mirror of
https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs.git
synced 2025-06-05 06:58:58 +00:00
net/aws/transcriber: translate: optional experimental translation tokenization
This commit adds an optional experimental translation tokenization feature. It can be activated using the `translation_src_%u` pads property `tokenization-method`. For the moment, the feature is deactivated by default. The Translate ws accepts '<span></span>' tags in the input and adds matching tags in the output. When an 'id' is also provided as an attribute of the 'span', the matching output tag also uses this 'id'. In the context of close captions, the 'id's are of little use. However, we can take advantage of the spans in the output to identify translation chunks, which more or less reflect the rythm of the input transcript. This commit adds simples spans (no 'id') to the input Transcript Items and parses the resulting spans in the translated output, assigning the timestamps and durations sequentially from the input Transcript Items. Edge cases such as absence of spans, nested spans were observed and are handled here. Similarly, mismatches between the number of input and output items are taken care of by some sort of reconcialiation. Note that this is still experimental and requires further testings. Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/1109>
This commit is contained in:
parent
743e97738f
commit
299e25ab3c
5 changed files with 542 additions and 92 deletions
|
@ -650,6 +650,12 @@
|
||||||
"direction": "src",
|
"direction": "src",
|
||||||
"presence": "request",
|
"presence": "request",
|
||||||
"type": "GstTranslationSrcPad"
|
"type": "GstTranslationSrcPad"
|
||||||
|
},
|
||||||
|
"translation_src_%%u": {
|
||||||
|
"caps": "text/x-raw:\n format: utf8\n",
|
||||||
|
"direction": "src",
|
||||||
|
"presence": "request",
|
||||||
|
"type": "GstTranslationSrcPad"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -773,7 +779,7 @@
|
||||||
"construct": false,
|
"construct": false,
|
||||||
"construct-only": false,
|
"construct-only": false,
|
||||||
"controllable": false,
|
"controllable": false,
|
||||||
"default": "3000",
|
"default": "5000",
|
||||||
"max": "-1",
|
"max": "-1",
|
||||||
"min": "0",
|
"min": "0",
|
||||||
"mutable": "ready",
|
"mutable": "ready",
|
||||||
|
@ -858,6 +864,21 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"GstAwsTranscriberTranslationTokenizationMethod": {
|
||||||
|
"kind": "enum",
|
||||||
|
"values": [
|
||||||
|
{
|
||||||
|
"desc": "None: don't tokenize translations",
|
||||||
|
"name": "none",
|
||||||
|
"value": "0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"desc": "Span based: insert spans in the transript text and use the resulting spans in the translations to reproduce speech pacing.",
|
||||||
|
"name": "span-based",
|
||||||
|
"value": "1"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
"GstAwsTranscriberVocabularyFilterMethod": {
|
"GstAwsTranscriberVocabularyFilterMethod": {
|
||||||
"kind": "enum",
|
"kind": "enum",
|
||||||
"values": [
|
"values": [
|
||||||
|
@ -919,6 +940,18 @@
|
||||||
"readable": true,
|
"readable": true,
|
||||||
"type": "gchararray",
|
"type": "gchararray",
|
||||||
"writable": true
|
"writable": true
|
||||||
|
},
|
||||||
|
"tokenization-method": {
|
||||||
|
"blurb": "The tokenization method to apply to translations",
|
||||||
|
"conditionally-available": false,
|
||||||
|
"construct": false,
|
||||||
|
"construct-only": false,
|
||||||
|
"controllable": false,
|
||||||
|
"default": "none (0)",
|
||||||
|
"mutable": "ready",
|
||||||
|
"readable": true,
|
||||||
|
"type": "GstAwsTranscriberTranslationTokenizationMethod",
|
||||||
|
"writable": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,7 +35,10 @@ use once_cell::sync::Lazy;
|
||||||
|
|
||||||
use super::transcribe::{TranscriberLoop, TranscriptEvent, TranscriptItem, TranscriptionSettings};
|
use super::transcribe::{TranscriberLoop, TranscriptEvent, TranscriptItem, TranscriptionSettings};
|
||||||
use super::translate::{TranslatedItem, TranslationLoop, TranslationQueue};
|
use super::translate::{TranslatedItem, TranslationLoop, TranslationQueue};
|
||||||
use super::{AwsTranscriberResultStability, AwsTranscriberVocabularyFilterMethod, CAT};
|
use super::{
|
||||||
|
AwsTranscriberResultStability, AwsTranscriberVocabularyFilterMethod,
|
||||||
|
TranslationTokenizationMethod, CAT,
|
||||||
|
};
|
||||||
|
|
||||||
static RUNTIME: Lazy<runtime::Runtime> = Lazy::new(|| {
|
static RUNTIME: Lazy<runtime::Runtime> = Lazy::new(|| {
|
||||||
runtime::Builder::new_multi_thread()
|
runtime::Builder::new_multi_thread()
|
||||||
|
@ -73,6 +76,8 @@ pub const GRANULARITY: gst::ClockTime = gst::ClockTime::from_mseconds(100);
|
||||||
const OUTPUT_LANG_CODE_PROPERTY: &str = "language-code";
|
const OUTPUT_LANG_CODE_PROPERTY: &str = "language-code";
|
||||||
const DEFAULT_OUTPUT_LANG_CODE: Option<&str> = None;
|
const DEFAULT_OUTPUT_LANG_CODE: Option<&str> = None;
|
||||||
|
|
||||||
|
const TRANSLATION_TOKENIZATION_PROPERTY: &str = "tokenization-method";
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub(super) struct Settings {
|
pub(super) struct Settings {
|
||||||
transcribe_latency: gst::ClockTime,
|
transcribe_latency: gst::ClockTime,
|
||||||
|
@ -850,8 +855,8 @@ struct TranslationPadTask {
|
||||||
needs_translate: bool,
|
needs_translate: bool,
|
||||||
translation_queue: TranslationQueue,
|
translation_queue: TranslationQueue,
|
||||||
translation_loop_handle: Option<task::JoinHandle<Result<(), gst::ErrorMessage>>>,
|
translation_loop_handle: Option<task::JoinHandle<Result<(), gst::ErrorMessage>>>,
|
||||||
to_translation_tx: Option<mpsc::Sender<TranscriptItem>>,
|
to_translation_tx: Option<mpsc::Sender<Vec<TranscriptItem>>>,
|
||||||
from_translation_rx: Option<mpsc::Receiver<TranslatedItem>>,
|
from_translation_rx: Option<mpsc::Receiver<Vec<TranslatedItem>>>,
|
||||||
translate_latency: gst::ClockTime,
|
translate_latency: gst::ClockTime,
|
||||||
transcript_lookahead: gst::ClockTime,
|
transcript_lookahead: gst::ClockTime,
|
||||||
send_events: bool,
|
send_events: bool,
|
||||||
|
@ -991,14 +996,14 @@ impl TranslationPadTask {
|
||||||
// before current latency budget is exhausted.
|
// before current latency budget is exhausted.
|
||||||
futures::select_biased! {
|
futures::select_biased! {
|
||||||
_ = timeout => return Ok(()),
|
_ = timeout => return Ok(()),
|
||||||
translated_item = from_translation_rx.next() => {
|
translated_items = from_translation_rx.next() => {
|
||||||
let Some(translated_item) = translated_item else {
|
let Some(translated_items) = translated_items else {
|
||||||
const ERR: &str = "translation chan terminated";
|
const ERR: &str = "translation chan terminated";
|
||||||
gst::debug!(CAT, imp: self.pad, "{ERR}");
|
gst::debug!(CAT, imp: self.pad, "{ERR}");
|
||||||
return Err(gst::error_msg!(gst::StreamError::Failed, ["{ERR}"]));
|
return Err(gst::error_msg!(gst::StreamError::Failed, ["{ERR}"]));
|
||||||
};
|
};
|
||||||
|
|
||||||
self.translated_items.push_back(translated_item);
|
self.translated_items.extend(translated_items);
|
||||||
self.pending_translations = self.pending_translations.saturating_sub(1);
|
self.pending_translations = self.pending_translations.saturating_sub(1);
|
||||||
|
|
||||||
return Ok(());
|
return Ok(());
|
||||||
|
@ -1027,9 +1032,9 @@ impl TranslationPadTask {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
for item in transcript_items.iter() {
|
for items in transcript_items.iter() {
|
||||||
if let Some(ready_item) = self.translation_queue.push(item) {
|
if let Some(ready_items) = self.translation_queue.push(items) {
|
||||||
self.send_for_translation(ready_item).await?;
|
self.send_for_translation(ready_items).await?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1072,19 +1077,12 @@ impl TranslationPadTask {
|
||||||
|
|
||||||
let deadline = translation_eta.saturating_sub(max_delay);
|
let deadline = translation_eta.saturating_sub(max_delay);
|
||||||
|
|
||||||
if let Some(ready_item) = self
|
if let Some(ready_items) = self
|
||||||
.translation_queue
|
.translation_queue
|
||||||
.dequeue(deadline, self.transcript_lookahead)
|
.dequeue(deadline, self.transcript_lookahead)
|
||||||
{
|
{
|
||||||
gst::debug!(
|
gst::debug!(CAT, imp: self.pad, "Forcing {} transcripts to translation", ready_items.len());
|
||||||
CAT,
|
if self.send_for_translation(ready_items).await.is_err() {
|
||||||
imp: self.pad,
|
|
||||||
"Forcing transcript at pts {} with duration {} to translation",
|
|
||||||
ready_item.pts,
|
|
||||||
ready_item.duration,
|
|
||||||
);
|
|
||||||
|
|
||||||
if self.send_for_translation(ready_item).await.is_err() {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1240,13 +1238,13 @@ impl TranslationPadTask {
|
||||||
|
|
||||||
async fn send_for_translation(
|
async fn send_for_translation(
|
||||||
&mut self,
|
&mut self,
|
||||||
transcript_item: TranscriptItem,
|
transcript_items: Vec<TranscriptItem>,
|
||||||
) -> Result<(), gst::ErrorMessage> {
|
) -> Result<(), gst::ErrorMessage> {
|
||||||
let res = self
|
let res = self
|
||||||
.to_translation_tx
|
.to_translation_tx
|
||||||
.as_mut()
|
.as_mut()
|
||||||
.expect("to_translation chan must be available in translation mode")
|
.expect("to_translation chan must be available in translation mode")
|
||||||
.send(transcript_item)
|
.send(transcript_items)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
if res.is_err() {
|
if res.is_err() {
|
||||||
|
@ -1346,6 +1344,7 @@ impl TranslationPadTask {
|
||||||
&self.pad,
|
&self.pad,
|
||||||
&elem_settings.language_code,
|
&elem_settings.language_code,
|
||||||
pad_settings.language_code.as_deref().unwrap(),
|
pad_settings.language_code.as_deref().unwrap(),
|
||||||
|
pad_settings.tokenization_method,
|
||||||
to_translation_rx,
|
to_translation_rx,
|
||||||
from_translation_tx,
|
from_translation_tx,
|
||||||
));
|
));
|
||||||
|
@ -1384,6 +1383,7 @@ impl Default for TranslationPadState {
|
||||||
#[derive(Debug, Default, Clone)]
|
#[derive(Debug, Default, Clone)]
|
||||||
struct TranslationPadSettings {
|
struct TranslationPadSettings {
|
||||||
language_code: Option<String>,
|
language_code: Option<String>,
|
||||||
|
tokenization_method: TranslationTokenizationMethod,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
|
@ -1566,12 +1566,20 @@ impl ObjectSubclass for TranslationSrcPad {
|
||||||
impl ObjectImpl for TranslationSrcPad {
|
impl ObjectImpl for TranslationSrcPad {
|
||||||
fn properties() -> &'static [glib::ParamSpec] {
|
fn properties() -> &'static [glib::ParamSpec] {
|
||||||
static PROPERTIES: Lazy<Vec<glib::ParamSpec>> = Lazy::new(|| {
|
static PROPERTIES: Lazy<Vec<glib::ParamSpec>> = Lazy::new(|| {
|
||||||
vec![glib::ParamSpecString::builder(OUTPUT_LANG_CODE_PROPERTY)
|
vec![
|
||||||
.nick("Language Code")
|
glib::ParamSpecString::builder(OUTPUT_LANG_CODE_PROPERTY)
|
||||||
.blurb("The Language the Stream must be translated to")
|
.nick("Language Code")
|
||||||
.default_value(DEFAULT_OUTPUT_LANG_CODE)
|
.blurb("The Language the Stream must be translated to")
|
||||||
.mutable_ready()
|
.default_value(DEFAULT_OUTPUT_LANG_CODE)
|
||||||
.build()]
|
.mutable_ready()
|
||||||
|
.build(),
|
||||||
|
glib::ParamSpecEnum::builder(TRANSLATION_TOKENIZATION_PROPERTY)
|
||||||
|
.nick("Translations tokenization method")
|
||||||
|
.blurb("The tokenization method to apply to translations")
|
||||||
|
.default_value(TranslationTokenizationMethod::default())
|
||||||
|
.mutable_ready()
|
||||||
|
.build(),
|
||||||
|
]
|
||||||
});
|
});
|
||||||
|
|
||||||
PROPERTIES.as_ref()
|
PROPERTIES.as_ref()
|
||||||
|
@ -1582,6 +1590,9 @@ impl ObjectImpl for TranslationSrcPad {
|
||||||
OUTPUT_LANG_CODE_PROPERTY => {
|
OUTPUT_LANG_CODE_PROPERTY => {
|
||||||
self.settings.lock().unwrap().language_code = value.get().unwrap()
|
self.settings.lock().unwrap().language_code = value.get().unwrap()
|
||||||
}
|
}
|
||||||
|
TRANSLATION_TOKENIZATION_PROPERTY => {
|
||||||
|
self.settings.lock().unwrap().tokenization_method = value.get().unwrap()
|
||||||
|
}
|
||||||
_ => unimplemented!(),
|
_ => unimplemented!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1589,6 +1600,9 @@ impl ObjectImpl for TranslationSrcPad {
|
||||||
fn property(&self, _id: usize, pspec: &glib::ParamSpec) -> glib::Value {
|
fn property(&self, _id: usize, pspec: &glib::ParamSpec) -> glib::Value {
|
||||||
match pspec.name() {
|
match pspec.name() {
|
||||||
OUTPUT_LANG_CODE_PROPERTY => self.settings.lock().unwrap().language_code.to_value(),
|
OUTPUT_LANG_CODE_PROPERTY => self.settings.lock().unwrap().language_code.to_value(),
|
||||||
|
TRANSLATION_TOKENIZATION_PROPERTY => {
|
||||||
|
self.settings.lock().unwrap().tokenization_method.to_value()
|
||||||
|
}
|
||||||
_ => unimplemented!(),
|
_ => unimplemented!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,6 +79,21 @@ impl From<AwsTranscriberVocabularyFilterMethod> for VocabularyFilterMethod {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy, glib::Enum)]
|
||||||
|
#[repr(u32)]
|
||||||
|
#[enum_type(name = "GstAwsTranscriberTranslationTokenizationMethod")]
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub enum TranslationTokenizationMethod {
|
||||||
|
#[default]
|
||||||
|
#[enum_value(name = "None: don't tokenize translations", nick = "none")]
|
||||||
|
None = 0,
|
||||||
|
#[enum_value(
|
||||||
|
name = "Span based: insert spans in the transript text and use the resulting spans in the translations to reproduce speech pacing.",
|
||||||
|
nick = "span-based"
|
||||||
|
)]
|
||||||
|
SpanBased = 1,
|
||||||
|
}
|
||||||
|
|
||||||
glib::wrapper! {
|
glib::wrapper! {
|
||||||
pub struct Transcriber(ObjectSubclass<imp::Transcriber>) @extends gst::Element, gst::Object, @implements gst::ChildProxy;
|
pub struct Transcriber(ObjectSubclass<imp::Transcriber>) @extends gst::Element, gst::Object, @implements gst::ChildProxy;
|
||||||
}
|
}
|
||||||
|
@ -94,6 +109,8 @@ pub fn register(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
|
||||||
.mark_as_plugin_api(gst::PluginAPIFlags::empty());
|
.mark_as_plugin_api(gst::PluginAPIFlags::empty());
|
||||||
AwsTranscriberVocabularyFilterMethod::static_type()
|
AwsTranscriberVocabularyFilterMethod::static_type()
|
||||||
.mark_as_plugin_api(gst::PluginAPIFlags::empty());
|
.mark_as_plugin_api(gst::PluginAPIFlags::empty());
|
||||||
|
TranslationTokenizationMethod::static_type()
|
||||||
|
.mark_as_plugin_api(gst::PluginAPIFlags::empty());
|
||||||
TranslationSrcPad::static_type().mark_as_plugin_api(gst::PluginAPIFlags::empty());
|
TranslationSrcPad::static_type().mark_as_plugin_api(gst::PluginAPIFlags::empty());
|
||||||
}
|
}
|
||||||
gst::Element::register(
|
gst::Element::register(
|
||||||
|
|
|
@ -69,18 +69,6 @@ impl TranscriptItem {
|
||||||
is_punctuation: matches!(item.r#type, Some(model::ItemType::Punctuation)),
|
is_punctuation: matches!(item.r#type, Some(model::ItemType::Punctuation)),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn push(&mut self, item: &TranscriptItem) {
|
|
||||||
self.duration += item.duration;
|
|
||||||
|
|
||||||
self.is_punctuation &= item.is_punctuation;
|
|
||||||
if !item.is_punctuation {
|
|
||||||
self.content.push(' ');
|
|
||||||
}
|
|
||||||
|
|
||||||
self.content.push_str(&item.content);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
|
|
@ -18,8 +18,12 @@ use std::collections::VecDeque;
|
||||||
|
|
||||||
use super::imp::TranslationSrcPad;
|
use super::imp::TranslationSrcPad;
|
||||||
use super::transcribe::TranscriptItem;
|
use super::transcribe::TranscriptItem;
|
||||||
use super::CAT;
|
use super::{TranslationTokenizationMethod, CAT};
|
||||||
|
|
||||||
|
const SPAN_START: &str = "<span>";
|
||||||
|
const SPAN_END: &str = "</span>";
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct TranslatedItem {
|
pub struct TranslatedItem {
|
||||||
pub pts: gst::ClockTime,
|
pub pts: gst::ClockTime,
|
||||||
pub duration: gst::ClockTime,
|
pub duration: gst::ClockTime,
|
||||||
|
@ -49,7 +53,7 @@ impl TranslationQueue {
|
||||||
/// Pushes the provided item.
|
/// Pushes the provided item.
|
||||||
///
|
///
|
||||||
/// Returns `Some(..)` if items are ready for translation.
|
/// Returns `Some(..)` if items are ready for translation.
|
||||||
pub fn push(&mut self, transcript_item: &TranscriptItem) -> Option<TranscriptItem> {
|
pub fn push(&mut self, transcript_item: &TranscriptItem) -> Option<Vec<TranscriptItem>> {
|
||||||
// Keep track of the item individually so we can schedule translation precisely.
|
// Keep track of the item individually so we can schedule translation precisely.
|
||||||
self.items.push_back(transcript_item.clone());
|
self.items.push_back(transcript_item.clone());
|
||||||
|
|
||||||
|
@ -57,16 +61,7 @@ impl TranslationQueue {
|
||||||
// This makes it a good chunk for translation.
|
// This makes it a good chunk for translation.
|
||||||
// Concatenate as a single item for translation
|
// Concatenate as a single item for translation
|
||||||
|
|
||||||
let mut items = self.items.drain(..);
|
return Some(self.items.drain(..).collect());
|
||||||
|
|
||||||
let mut item_acc = items.next()?;
|
|
||||||
for item in items {
|
|
||||||
item_acc.push(&item);
|
|
||||||
}
|
|
||||||
|
|
||||||
item_acc.push(transcript_item);
|
|
||||||
|
|
||||||
return Some(item_acc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Regular case: no separator detected, don't push transcript items
|
// Regular case: no separator detected, don't push transcript items
|
||||||
|
@ -78,12 +73,12 @@ impl TranslationQueue {
|
||||||
|
|
||||||
/// Dequeues items from the specified `deadline` up to `lookahead`.
|
/// Dequeues items from the specified `deadline` up to `lookahead`.
|
||||||
///
|
///
|
||||||
/// Returns `Some(..)` with the accumulated items matching the criteria.
|
/// Returns `Some(..)` if some items match the criteria.
|
||||||
pub fn dequeue(
|
pub fn dequeue(
|
||||||
&mut self,
|
&mut self,
|
||||||
deadline: gst::ClockTime,
|
deadline: gst::ClockTime,
|
||||||
lookahead: gst::ClockTime,
|
lookahead: gst::ClockTime,
|
||||||
) -> Option<TranscriptItem> {
|
) -> Option<Vec<TranscriptItem>> {
|
||||||
if self.items.front()?.pts < deadline {
|
if self.items.front()?.pts < deadline {
|
||||||
// First item is too early to be sent to translation now
|
// First item is too early to be sent to translation now
|
||||||
// we can wait for more items to accumulate.
|
// we can wait for more items to accumulate.
|
||||||
|
@ -94,17 +89,16 @@ impl TranslationQueue {
|
||||||
// Try to get up to lookahead more items to improve translation accuracy
|
// Try to get up to lookahead more items to improve translation accuracy
|
||||||
let limit = deadline + lookahead;
|
let limit = deadline + lookahead;
|
||||||
|
|
||||||
let mut item_acc = self.items.pop_front().unwrap();
|
let mut items_acc = vec![self.items.pop_front().unwrap()];
|
||||||
while let Some(item) = self.items.front() {
|
while let Some(item) = self.items.front() {
|
||||||
if item.pts > limit {
|
if item.pts > limit {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
let item = self.items.pop_front().unwrap();
|
items_acc.push(self.items.pop_front().unwrap());
|
||||||
item_acc.push(&item);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Some(item_acc)
|
Some(items_acc)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -113,8 +107,9 @@ pub struct TranslationLoop {
|
||||||
client: aws_translate::Client,
|
client: aws_translate::Client,
|
||||||
input_lang: String,
|
input_lang: String,
|
||||||
output_lang: String,
|
output_lang: String,
|
||||||
transcript_rx: mpsc::Receiver<TranscriptItem>,
|
tokenization_method: TranslationTokenizationMethod,
|
||||||
translation_tx: mpsc::Sender<TranslatedItem>,
|
transcript_rx: mpsc::Receiver<Vec<TranscriptItem>>,
|
||||||
|
translation_tx: mpsc::Sender<Vec<TranslatedItem>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TranslationLoop {
|
impl TranslationLoop {
|
||||||
|
@ -123,8 +118,9 @@ impl TranslationLoop {
|
||||||
pad: &TranslationSrcPad,
|
pad: &TranslationSrcPad,
|
||||||
input_lang: &str,
|
input_lang: &str,
|
||||||
output_lang: &str,
|
output_lang: &str,
|
||||||
transcript_rx: mpsc::Receiver<TranscriptItem>,
|
tokenization_method: TranslationTokenizationMethod,
|
||||||
translation_tx: mpsc::Sender<TranslatedItem>,
|
transcript_rx: mpsc::Receiver<Vec<TranscriptItem>>,
|
||||||
|
translation_tx: mpsc::Sender<Vec<TranslatedItem>>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let aws_config = imp.aws_config.lock().unwrap();
|
let aws_config = imp.aws_config.lock().unwrap();
|
||||||
let aws_config = aws_config
|
let aws_config = aws_config
|
||||||
|
@ -136,6 +132,7 @@ impl TranslationLoop {
|
||||||
client: aws_sdk_translate::Client::new(aws_config),
|
client: aws_sdk_translate::Client::new(aws_config),
|
||||||
input_lang: input_lang.to_string(),
|
input_lang: input_lang.to_string(),
|
||||||
output_lang: output_lang.to_string(),
|
output_lang: output_lang.to_string(),
|
||||||
|
tokenization_method,
|
||||||
transcript_rx,
|
transcript_rx,
|
||||||
translation_tx,
|
translation_tx,
|
||||||
}
|
}
|
||||||
|
@ -167,40 +164,70 @@ impl TranslationLoop {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn run(mut self) -> Result<(), gst::ErrorMessage> {
|
pub async fn run(mut self) -> Result<(), gst::ErrorMessage> {
|
||||||
while let Some(transcript_item) = self.transcript_rx.next().await {
|
use TranslationTokenizationMethod as Tokenization;
|
||||||
let TranscriptItem {
|
|
||||||
pts,
|
|
||||||
duration,
|
|
||||||
content,
|
|
||||||
..
|
|
||||||
} = transcript_item;
|
|
||||||
|
|
||||||
let translated_text = if content.is_empty() {
|
while let Some(transcript_items) = self.transcript_rx.next().await {
|
||||||
content
|
if transcript_items.is_empty() {
|
||||||
} else {
|
continue;
|
||||||
self.client
|
}
|
||||||
.translate_text()
|
|
||||||
.set_source_language_code(Some(self.input_lang.clone()))
|
let (ts_duration_list, content): (Vec<(gst::ClockTime, gst::ClockTime)>, String) =
|
||||||
.set_target_language_code(Some(self.output_lang.clone()))
|
transcript_items
|
||||||
.set_text(Some(content))
|
.into_iter()
|
||||||
.send()
|
.map(|item| {
|
||||||
.await
|
(
|
||||||
.map_err(|err| {
|
(item.pts, item.duration),
|
||||||
let err = format!("Failed to call translation service: {err}");
|
match self.tokenization_method {
|
||||||
gst::info!(CAT, imp: self.pad, "{err}");
|
Tokenization::None => item.content,
|
||||||
gst::error_msg!(gst::LibraryError::Failed, ["{err}"])
|
Tokenization::SpanBased => {
|
||||||
})?
|
format!("{SPAN_START}{}{SPAN_END}", item.content)
|
||||||
.translated_text
|
}
|
||||||
.unwrap_or_default()
|
},
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.unzip();
|
||||||
|
|
||||||
|
gst::trace!(CAT, imp: self.pad, "Translating {content} with {ts_duration_list:?}");
|
||||||
|
|
||||||
|
let translated_text = self
|
||||||
|
.client
|
||||||
|
.translate_text()
|
||||||
|
.set_source_language_code(Some(self.input_lang.clone()))
|
||||||
|
.set_target_language_code(Some(self.output_lang.clone()))
|
||||||
|
.set_text(Some(content))
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|err| {
|
||||||
|
let err = format!("Failed to call translation service: {err}");
|
||||||
|
gst::info!(CAT, imp: self.pad, "{err}");
|
||||||
|
gst::error_msg!(gst::LibraryError::Failed, ["{err}"])
|
||||||
|
})?
|
||||||
|
.translated_text
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
gst::trace!(CAT, imp: self.pad, "Got translation {translated_text}");
|
||||||
|
|
||||||
|
let translated_items = match self.tokenization_method {
|
||||||
|
Tokenization::None => {
|
||||||
|
// Push translation as a single item
|
||||||
|
let mut ts_duration_iter = ts_duration_list.into_iter().peekable();
|
||||||
|
|
||||||
|
let &(first_pts, _) = ts_duration_iter.peek().expect("at least one item");
|
||||||
|
let (last_pts, last_duration) =
|
||||||
|
ts_duration_iter.last().expect("at least one item");
|
||||||
|
|
||||||
|
vec![TranslatedItem {
|
||||||
|
pts: first_pts,
|
||||||
|
duration: last_pts.saturating_sub(first_pts) + last_duration,
|
||||||
|
content: translated_text,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
Tokenization::SpanBased => span_tokenize_items(&translated_text, ts_duration_list),
|
||||||
};
|
};
|
||||||
|
|
||||||
let translated_item = TranslatedItem {
|
gst::trace!(CAT, imp: self.pad, "Sending {translated_items:?}");
|
||||||
pts,
|
|
||||||
duration,
|
|
||||||
content: translated_text,
|
|
||||||
};
|
|
||||||
|
|
||||||
if self.translation_tx.send(translated_item).await.is_err() {
|
if self.translation_tx.send(translated_items).await.is_err() {
|
||||||
gst::info!(
|
gst::info!(
|
||||||
CAT,
|
CAT,
|
||||||
imp: self.pad,
|
imp: self.pad,
|
||||||
|
@ -213,3 +240,374 @@ impl TranslationLoop {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parses translated items from the `translation` `String` using `span` tags.
|
||||||
|
///
|
||||||
|
/// The `translation` is expected to have been returned by the `Translate` ws.
|
||||||
|
/// It can contain id-less `<span>` and `</span>` tags, matching similar
|
||||||
|
/// id-less tags from the content submitted to the `Translate` ws.
|
||||||
|
///
|
||||||
|
/// This parser accepts both serial `<span></span>` as well as nested
|
||||||
|
/// `<span><span></span></span>`.
|
||||||
|
///
|
||||||
|
/// The parsed items are assigned the ts and duration from `ts_duration_list`
|
||||||
|
/// in their order of appearance.
|
||||||
|
///
|
||||||
|
/// If more parsed items are found, the last item will concatenate the remaining items.
|
||||||
|
///
|
||||||
|
/// If less parsed items are found, the last item will be assign the remaining
|
||||||
|
/// duration from the `ts_duration_list`.
|
||||||
|
fn span_tokenize_items(
|
||||||
|
translation: &str,
|
||||||
|
ts_duration_list: impl IntoIterator<Item = (gst::ClockTime, gst::ClockTime)>,
|
||||||
|
) -> Vec<TranslatedItem> {
|
||||||
|
const SPAN_START_LEN: usize = SPAN_START.len();
|
||||||
|
const SPAN_END_LEN: usize = SPAN_END.len();
|
||||||
|
|
||||||
|
let mut translated_items = vec![];
|
||||||
|
|
||||||
|
let mut ts_duration_iter = ts_duration_list.into_iter();
|
||||||
|
|
||||||
|
// Content for a translated item
|
||||||
|
let mut content = String::new();
|
||||||
|
|
||||||
|
// Alleged span chunk
|
||||||
|
let mut chunk = String::new();
|
||||||
|
|
||||||
|
for c in translation.chars() {
|
||||||
|
if content.is_empty() && c.is_whitespace() {
|
||||||
|
// ignore leading whitespaces
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if chunk.is_empty() {
|
||||||
|
if c == '<' {
|
||||||
|
// Start an alleged span chunk
|
||||||
|
chunk.push(c);
|
||||||
|
} else {
|
||||||
|
content.push(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
chunk.push(c);
|
||||||
|
|
||||||
|
match chunk.len() {
|
||||||
|
len if len < SPAN_START_LEN => continue,
|
||||||
|
SPAN_START_LEN => {
|
||||||
|
if chunk != SPAN_START {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Got a <span>
|
||||||
|
}
|
||||||
|
SPAN_END_LEN => {
|
||||||
|
if chunk != SPAN_END {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Got a </span>
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// Can no longer be a span
|
||||||
|
content.extend(chunk.drain(..));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// got a span
|
||||||
|
chunk.clear();
|
||||||
|
|
||||||
|
if content.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add pending content
|
||||||
|
// assign it the next pts and duration from the input list
|
||||||
|
if let Some((pts, duration)) = ts_duration_iter.next() {
|
||||||
|
translated_items.push(TranslatedItem {
|
||||||
|
pts,
|
||||||
|
duration,
|
||||||
|
content,
|
||||||
|
});
|
||||||
|
|
||||||
|
content = String::new();
|
||||||
|
} else if let Some(last_item) = translated_items.last_mut() {
|
||||||
|
// exhausted available pts and duration
|
||||||
|
// add content to last item
|
||||||
|
if !last_item.content.ends_with(' ') {
|
||||||
|
last_item.content.push(' ');
|
||||||
|
}
|
||||||
|
last_item.content.extend(content.drain(..));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
content.extend(chunk.drain(..));
|
||||||
|
|
||||||
|
if !content.is_empty() {
|
||||||
|
// Add last content
|
||||||
|
if let Some((pts, mut duration)) = ts_duration_iter.next() {
|
||||||
|
if let Some((last_pts, last_duration)) = ts_duration_iter.last() {
|
||||||
|
// Fix remaining duration
|
||||||
|
duration = last_pts.saturating_sub(pts) + last_duration;
|
||||||
|
}
|
||||||
|
|
||||||
|
translated_items.push(TranslatedItem {
|
||||||
|
pts,
|
||||||
|
duration,
|
||||||
|
content,
|
||||||
|
});
|
||||||
|
} else if let Some(last_item) = translated_items.last_mut() {
|
||||||
|
// No more pts and duration in the index
|
||||||
|
// Add remaining content to the last item pushed
|
||||||
|
if !last_item.content.ends_with(' ') {
|
||||||
|
last_item.content.push(' ');
|
||||||
|
}
|
||||||
|
last_item.content.push_str(&content);
|
||||||
|
}
|
||||||
|
} else if let Some((last_pts, last_duration)) = ts_duration_iter.last() {
|
||||||
|
if let Some(last_item) = translated_items.last_mut() {
|
||||||
|
// No more content, but need to fix last item's duration
|
||||||
|
last_item.duration = last_pts.saturating_sub(last_item.pts) + last_duration;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
translated_items
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::span_tokenize_items;
|
||||||
|
use gst::prelude::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn serial_spans() {
|
||||||
|
let input = "<span>first</span> <span>second</span> <span>third</span>";
|
||||||
|
let ts_duration_list = vec![
|
||||||
|
(0.seconds(), 1.seconds()),
|
||||||
|
(1.seconds(), 2.seconds()),
|
||||||
|
(4.seconds(), 3.seconds()),
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
|
||||||
|
|
||||||
|
let first = items.next().unwrap();
|
||||||
|
assert_eq!(first.pts, 0.seconds());
|
||||||
|
assert_eq!(first.duration, 1.seconds());
|
||||||
|
assert_eq!(first.content, "first");
|
||||||
|
|
||||||
|
let second = items.next().unwrap();
|
||||||
|
assert_eq!(second.pts, 1.seconds());
|
||||||
|
assert_eq!(second.duration, 2.seconds());
|
||||||
|
assert_eq!(second.content, "second");
|
||||||
|
|
||||||
|
let third = items.next().unwrap();
|
||||||
|
assert_eq!(third.pts, 4.seconds());
|
||||||
|
assert_eq!(third.duration, 3.seconds());
|
||||||
|
assert_eq!(third.content, "third");
|
||||||
|
|
||||||
|
assert!(items.next().is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn serial_and_nested_spans() {
|
||||||
|
let input = "<span>first</span> <span>second <span>third</span></span> <span>fourth</span>";
|
||||||
|
let ts_duration_list = vec![
|
||||||
|
(0.seconds(), 1.seconds()),
|
||||||
|
(1.seconds(), 2.seconds()),
|
||||||
|
(3.seconds(), 1.seconds()),
|
||||||
|
(4.seconds(), 2.seconds()),
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
|
||||||
|
|
||||||
|
let first = items.next().unwrap();
|
||||||
|
assert_eq!(first.pts, 0.seconds());
|
||||||
|
assert_eq!(first.duration, 1.seconds());
|
||||||
|
assert_eq!(first.content, "first");
|
||||||
|
|
||||||
|
let second = items.next().unwrap();
|
||||||
|
assert_eq!(second.pts, 1.seconds());
|
||||||
|
assert_eq!(second.duration, 2.seconds());
|
||||||
|
assert_eq!(second.content, "second ");
|
||||||
|
|
||||||
|
let third = items.next().unwrap();
|
||||||
|
assert_eq!(third.pts, 3.seconds());
|
||||||
|
assert_eq!(third.duration, 1.seconds());
|
||||||
|
assert_eq!(third.content, "third");
|
||||||
|
|
||||||
|
let fourth = items.next().unwrap();
|
||||||
|
assert_eq!(fourth.pts, 4.seconds());
|
||||||
|
assert_eq!(fourth.duration, 2.seconds());
|
||||||
|
assert_eq!(fourth.content, "fourth");
|
||||||
|
|
||||||
|
assert!(items.next().is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn nonspaned_serial_and_nested_spans() {
|
||||||
|
let input = "Initial <span>first</span> <span>second <span>third</span></span> <span>fourth</span> final";
|
||||||
|
let ts_duration_list = vec![
|
||||||
|
(0.seconds(), 1.seconds()),
|
||||||
|
(1.seconds(), 1.seconds()),
|
||||||
|
(2.seconds(), 1.seconds()),
|
||||||
|
(3.seconds(), 1.seconds()),
|
||||||
|
(4.seconds(), 1.seconds()),
|
||||||
|
(5.seconds(), 1.seconds()),
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
|
||||||
|
|
||||||
|
let init = items.next().unwrap();
|
||||||
|
assert_eq!(init.pts, 0.seconds());
|
||||||
|
assert_eq!(init.duration, 1.seconds());
|
||||||
|
assert_eq!(init.content, "Initial ");
|
||||||
|
|
||||||
|
let first = items.next().unwrap();
|
||||||
|
assert_eq!(first.pts, 1.seconds());
|
||||||
|
assert_eq!(first.duration, 1.seconds());
|
||||||
|
assert_eq!(first.content, "first");
|
||||||
|
|
||||||
|
let second = items.next().unwrap();
|
||||||
|
assert_eq!(second.pts, 2.seconds());
|
||||||
|
assert_eq!(second.duration, 1.seconds());
|
||||||
|
assert_eq!(second.content, "second ");
|
||||||
|
|
||||||
|
let third = items.next().unwrap();
|
||||||
|
assert_eq!(third.pts, 3.seconds());
|
||||||
|
assert_eq!(third.duration, 1.seconds());
|
||||||
|
assert_eq!(third.content, "third");
|
||||||
|
|
||||||
|
let fourth = items.next().unwrap();
|
||||||
|
assert_eq!(fourth.pts, 4.seconds());
|
||||||
|
assert_eq!(fourth.duration, 1.seconds());
|
||||||
|
assert_eq!(fourth.content, "fourth");
|
||||||
|
|
||||||
|
let final_ = items.next().unwrap();
|
||||||
|
assert_eq!(final_.pts, 5.seconds());
|
||||||
|
assert_eq!(final_.duration, 1.seconds());
|
||||||
|
assert_eq!(final_.content, "final");
|
||||||
|
|
||||||
|
assert!(items.next().is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn more_parsed_items() {
|
||||||
|
let input = "<span>first</span> <span>second</span> <span>third</span> <span>fourth</span>";
|
||||||
|
let ts_duration_list = vec![
|
||||||
|
(0.seconds(), 1.seconds()),
|
||||||
|
(1.seconds(), 2.seconds()),
|
||||||
|
(4.seconds(), 3.seconds()),
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
|
||||||
|
|
||||||
|
let first = items.next().unwrap();
|
||||||
|
assert_eq!(first.pts, 0.seconds());
|
||||||
|
assert_eq!(first.duration, 1.seconds());
|
||||||
|
assert_eq!(first.content, "first");
|
||||||
|
|
||||||
|
let second = items.next().unwrap();
|
||||||
|
assert_eq!(second.pts, 1.seconds());
|
||||||
|
assert_eq!(second.duration, 2.seconds());
|
||||||
|
assert_eq!(second.content, "second");
|
||||||
|
|
||||||
|
let third = items.next().unwrap();
|
||||||
|
assert_eq!(third.pts, 4.seconds());
|
||||||
|
assert_eq!(third.duration, 3.seconds());
|
||||||
|
assert_eq!(third.content, "third fourth");
|
||||||
|
|
||||||
|
assert!(items.next().is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn more_parsed_items_nonspan_final() {
|
||||||
|
let input = "<span>first</span> <span>second</span> <span>third</span> final";
|
||||||
|
let ts_duration_list = vec![
|
||||||
|
(0.seconds(), 1.seconds()),
|
||||||
|
(1.seconds(), 2.seconds()),
|
||||||
|
(4.seconds(), 3.seconds()),
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
|
||||||
|
|
||||||
|
let first = items.next().unwrap();
|
||||||
|
assert_eq!(first.pts, 0.seconds());
|
||||||
|
assert_eq!(first.duration, 1.seconds());
|
||||||
|
assert_eq!(first.content, "first");
|
||||||
|
|
||||||
|
let second = items.next().unwrap();
|
||||||
|
assert_eq!(second.pts, 1.seconds());
|
||||||
|
assert_eq!(second.duration, 2.seconds());
|
||||||
|
assert_eq!(second.content, "second");
|
||||||
|
|
||||||
|
let third = items.next().unwrap();
|
||||||
|
assert_eq!(third.pts, 4.seconds());
|
||||||
|
assert_eq!(third.duration, 3.seconds());
|
||||||
|
assert_eq!(third.content, "third final");
|
||||||
|
|
||||||
|
assert!(items.next().is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn less_parsed_items() {
|
||||||
|
let input = "<span>first</span> <span>second</span>";
|
||||||
|
let ts_duration_list = vec![
|
||||||
|
(0.seconds(), 1.seconds()),
|
||||||
|
(1.seconds(), 2.seconds()),
|
||||||
|
(4.seconds(), 3.seconds()),
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
|
||||||
|
|
||||||
|
let first = items.next().unwrap();
|
||||||
|
assert_eq!(first.pts, 0.seconds());
|
||||||
|
assert_eq!(first.duration, 1.seconds());
|
||||||
|
assert_eq!(first.content, "first");
|
||||||
|
|
||||||
|
let second = items.next().unwrap();
|
||||||
|
assert_eq!(second.pts, 1.seconds());
|
||||||
|
assert_eq!(second.duration, 6.seconds());
|
||||||
|
assert_eq!(second.content, "second");
|
||||||
|
|
||||||
|
assert!(items.next().is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn less_parsed_items_nonspan_final() {
|
||||||
|
let input = "<span>first</span> final";
|
||||||
|
let ts_duration_list = vec![
|
||||||
|
(0.seconds(), 1.seconds()),
|
||||||
|
(1.seconds(), 2.seconds()),
|
||||||
|
(4.seconds(), 3.seconds()),
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
|
||||||
|
|
||||||
|
let first = items.next().unwrap();
|
||||||
|
assert_eq!(first.pts, 0.seconds());
|
||||||
|
assert_eq!(first.duration, 1.seconds());
|
||||||
|
assert_eq!(first.content, "first");
|
||||||
|
|
||||||
|
let final_ = items.next().unwrap();
|
||||||
|
assert_eq!(final_.pts, 1.seconds());
|
||||||
|
assert_eq!(final_.duration, 6.seconds());
|
||||||
|
assert_eq!(final_.content, "final");
|
||||||
|
|
||||||
|
assert!(items.next().is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn utf8_input() {
|
||||||
|
let input = "caractères accentués";
|
||||||
|
let ts_duration_list = vec![(0.seconds(), 1.seconds())];
|
||||||
|
|
||||||
|
let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
|
||||||
|
|
||||||
|
let first = items.next().unwrap();
|
||||||
|
assert_eq!(first.pts, 0.seconds());
|
||||||
|
assert_eq!(first.duration, 1.seconds());
|
||||||
|
assert_eq!(first.content, "caractères accentués");
|
||||||
|
|
||||||
|
assert!(items.next().is_none());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue