diff --git a/docs/plugins/gst_plugins_cache.json b/docs/plugins/gst_plugins_cache.json index ad008d5b..e544762a 100644 --- a/docs/plugins/gst_plugins_cache.json +++ b/docs/plugins/gst_plugins_cache.json @@ -955,8 +955,32 @@ "type": "gchararray", "writable": true }, + "identify-language": { + "blurb": "Enables automatic language identification, see ", + "conditionally-available": false, + "construct": false, + "construct-only": false, + "controllable": false, + "default": "false", + "mutable": "ready", + "readable": true, + "type": "gboolean", + "writable": true + }, + "identify-multiple-languages": { + "blurb": "Enables automatic multi-language identification, see ", + "conditionally-available": false, + "construct": false, + "construct-only": false, + "controllable": false, + "default": "false", + "mutable": "ready", + "readable": true, + "type": "gboolean", + "writable": true + }, "language-code": { - "blurb": "The Language of the Stream, see for an up to date list of allowed languages", + "blurb": "The Language of the Stream, see for an up to date list of allowed languages", "conditionally-available": false, "construct": false, "construct-only": false, @@ -967,6 +991,18 @@ "type": "gchararray", "writable": true }, + "language-options": { + "blurb": "Two or more language codes that represent the languages which may be present in the media, see ", + "conditionally-available": false, + "construct": false, + "construct-only": false, + "controllable": false, + "default": "NULL", + "mutable": "ready", + "readable": true, + "type": "gchararray", + "writable": true + }, "latency": { "blurb": "Amount of milliseconds to allow AWS transcribe (Deprecated. Use transcribe-latency)", "conditionally-available": false, @@ -995,6 +1031,18 @@ "type": "guint", "writable": true }, + "preferred-language": { + "blurb": "Preferred language from the subset of languages codes specified in `language-options`, see ", + "conditionally-available": false, + "construct": false, + "construct-only": false, + "controllable": false, + "default": "NULL", + "mutable": "ready", + "readable": true, + "type": "gchararray", + "writable": true + }, "results-stability": { "blurb": "Defines how fast results should stabilize", "conditionally-available": false, @@ -1109,6 +1157,18 @@ "type": "gchararray", "writable": true }, + "vocabulary-filter-names": { + "blurb": "The names of a custom filter vocabularies to be used with identify-language or identify-multiple-languages, see ", + "conditionally-available": false, + "construct": false, + "construct-only": false, + "controllable": false, + "default": "NULL", + "mutable": "ready", + "readable": true, + "type": "gchararray", + "writable": true + }, "vocabulary-name": { "blurb": "The name of a custom vocabulary, see for more information", "conditionally-available": false, @@ -1120,6 +1180,18 @@ "readable": true, "type": "gchararray", "writable": true + }, + "vocabulary-names": { + "blurb": "The names of a custom vocabularies to be used with identify-language or identify-multiple-languages, see ", + "conditionally-available": false, + "construct": false, + "construct-only": false, + "controllable": false, + "default": "NULL", + "mutable": "ready", + "readable": true, + "type": "gchararray", + "writable": true } }, "rank": "none" diff --git a/net/aws/src/transcriber/imp.rs b/net/aws/src/transcriber/imp.rs index 1f34154d..593b67ba 100644 --- a/net/aws/src/transcriber/imp.rs +++ b/net/aws/src/transcriber/imp.rs @@ -88,13 +88,19 @@ pub(super) struct Settings { translate_lookahead: gst::ClockTime, lateness: gst::ClockTime, pub language_code: String, + pub identify_language: bool, + pub language_options: Option, + pub preferred_language: Option, + pub identify_multiple_languages: bool, pub vocabulary: Option, + pub vocabularies: Option, pub session_id: Option, pub results_stability: AwsTranscriberResultStability, access_key: Option, secret_access_key: Option, session_token: Option, pub vocabulary_filter: Option, + pub vocabulary_filters: Option, pub vocabulary_filter_method: AwsTranscriberVocabularyFilterMethod, } @@ -106,13 +112,19 @@ impl Default for Settings { translate_lookahead: DEFAULT_TRANSLATE_LOOKAHEAD, lateness: DEFAULT_LATENESS, language_code: DEFAULT_INPUT_LANG_CODE.to_string(), + identify_language: false, + language_options: None, + preferred_language: None, + identify_multiple_languages: false, vocabulary: None, + vocabularies: None, session_id: None, results_stability: DEFAULT_STABILITY, access_key: None, secret_access_key: None, session_token: None, vocabulary_filter: None, + vocabulary_filters: None, vocabulary_filter_method: DEFAULT_VOCABULARY_FILTER_METHOD, } } @@ -680,11 +692,35 @@ impl ObjectImpl for Transcriber { glib::ParamSpecString::builder("language-code") .nick("Language Code") .blurb("The Language of the Stream, see \ - \ + \ for an up to date list of allowed languages") .default_value(Some(DEFAULT_INPUT_LANG_CODE)) .mutable_ready() .build(), + glib::ParamSpecBoolean::builder("identify-language") + .nick("Identify Language") + .blurb("Enables automatic language identification, see \ + ") + .mutable_ready() + .build(), + glib::ParamSpecString::builder("language-options") + .nick("Language Options") + .blurb("Two or more comma-separated language codes that represent the languages which may be present in the media, see \ + ") + .mutable_ready() + .build(), + glib::ParamSpecString::builder("preferred-language") + .nick("Preferred Language") + .blurb("Preferred language from the subset of languages codes specified in `language-options`, see \ + ") + .mutable_ready() + .build(), + glib::ParamSpecBoolean::builder("identify-multiple-languages") + .nick("Identify Multiple Languages") + .blurb("Enables automatic multi-language identification, see \ + ") + .mutable_ready() + .build(), glib::ParamSpecUInt::builder(DEPRECATED_LATENCY_PROPERTY) .nick("Latency") .blurb("Amount of milliseconds to allow AWS transcribe (Deprecated. Use transcribe-latency)") @@ -729,6 +765,12 @@ impl ObjectImpl for Transcriber { for more information") .mutable_ready() .build(), + glib::ParamSpecString::builder("vocabulary-names") + .nick("Vocabulary Names") + .blurb("The names of comma-separated custom vocabularies to be used with identify-language or identify-multiple-languages, see \ + ") + .mutable_ready() + .build(), glib::ParamSpecString::builder("session-id") .nick("Session ID") .blurb("The ID of the transcription session, must be length 36") @@ -761,6 +803,12 @@ impl ObjectImpl for Transcriber { for more information") .mutable_ready() .build(), + glib::ParamSpecString::builder("vocabulary-filter-names") + .nick("Vocabulary Filter Names") + .blurb("The names of comma-separated custom filter vocabularies to be used with identify-language or identify-multiple-languages, see \ + ") + .mutable_ready() + .build(), glib::ParamSpecEnum::builder_with_default("vocabulary-filter-method", DEFAULT_VOCABULARY_FILTER_METHOD) .nick("Vocabulary Filter Method") .blurb("Defines how filtered words will be edited, has no effect when vocabulary-filter-name isn't set") @@ -787,6 +835,22 @@ impl ObjectImpl for Transcriber { let mut settings = self.settings.lock().unwrap(); settings.language_code = value.get().expect("type checked upstream"); } + "identify-language" => { + let mut settings = self.settings.lock().unwrap(); + settings.identify_language = value.get().unwrap(); + } + "language-options" => { + let mut settings = self.settings.lock().unwrap(); + settings.language_options = value.get().unwrap(); + } + "preferred-language" => { + let mut settings = self.settings.lock().unwrap(); + settings.preferred_language = value.get().unwrap(); + } + "identify-multiple-languages" => { + let mut settings = self.settings.lock().unwrap(); + settings.identify_multiple_languages = value.get().unwrap(); + } DEPRECATED_LATENCY_PROPERTY => { let mut settings = self.settings.lock().unwrap(); settings.transcribe_latency = gst::ClockTime::from_mseconds( @@ -817,6 +881,10 @@ impl ObjectImpl for Transcriber { let mut settings = self.settings.lock().unwrap(); settings.vocabulary = value.get().expect("type checked upstream"); } + "vocabulary-names" => { + let mut settings = self.settings.lock().unwrap(); + settings.vocabularies = value.get().expect("type checked upstream"); + } "session-id" => { let mut settings = self.settings.lock().unwrap(); settings.session_id = value.get().expect("type checked upstream"); @@ -843,6 +911,10 @@ impl ObjectImpl for Transcriber { let mut settings = self.settings.lock().unwrap(); settings.vocabulary_filter = value.get().expect("type checked upstream"); } + "vocabulary-filter-names" => { + let mut settings = self.settings.lock().unwrap(); + settings.vocabulary_filters = value.get().expect("type checked upstream"); + } "vocabulary-filter-method" => { let mut settings = self.settings.lock().unwrap(); settings.vocabulary_filter_method = value @@ -859,6 +931,22 @@ impl ObjectImpl for Transcriber { let settings = self.settings.lock().unwrap(); settings.language_code.to_value() } + "identify-language" => { + let settings = self.settings.lock().unwrap(); + settings.identify_language.to_value() + } + "language-options" => { + let settings = self.settings.lock().unwrap(); + settings.language_options.to_value() + } + "preferred-language" => { + let settings = self.settings.lock().unwrap(); + settings.preferred_language.to_value() + } + "identify-multiple-languages" => { + let settings = self.settings.lock().unwrap(); + settings.identify_multiple_languages.to_value() + } DEPRECATED_LATENCY_PROPERTY => { let settings = self.settings.lock().unwrap(); (settings.transcribe_latency.mseconds() as u32).to_value() @@ -881,6 +969,10 @@ impl ObjectImpl for Transcriber { let settings = self.settings.lock().unwrap(); settings.vocabulary.to_value() } + "vocabulary-names" => { + let settings = self.settings.lock().unwrap(); + settings.vocabularies.to_value() + } "session-id" => { let settings = self.settings.lock().unwrap(); settings.session_id.to_value() @@ -905,6 +997,10 @@ impl ObjectImpl for Transcriber { let settings = self.settings.lock().unwrap(); settings.vocabulary_filter.to_value() } + "vocabulary-filter-names" => { + let settings = self.settings.lock().unwrap(); + settings.vocabulary_filters.to_value() + } "vocabulary-filter-method" => { let settings = self.settings.lock().unwrap(); settings.vocabulary_filter_method.to_value() @@ -1091,6 +1187,7 @@ impl ChildProxyImpl for Transcriber { .map(|p| p.upcast()) } } + struct TranslationPadTask { pad: glib::subclass::ObjectImplRef, elem: super::Transcriber, @@ -1136,7 +1233,7 @@ impl TranslationPadTask { } needs_translate = TranslateSrcPad::needs_translation( - &elem_settings.language_code, + &elem_settings, pad_settings.language_code.as_deref(), ); @@ -1147,7 +1244,6 @@ impl TranslationPadTask { translation_loop = Some(TranslateLoop::new( elem_imp, pad, - &elem_settings.language_code, pad_settings.language_code.as_deref().unwrap(), pad_settings.tokenization_method, to_loop_rx, @@ -1634,10 +1730,24 @@ impl TranslateSrcPad { } #[inline] - fn needs_translation(input_lang: &str, output_lang: Option<&str>) -> bool { - output_lang.map_or(false, |other| { - !input_lang.eq_ignore_ascii_case(other.as_ref()) - }) + fn needs_translation(elem_settings: &Settings, output_lang: Option<&str>) -> bool { + let Some(output_lang) = output_lang else { + return false; + }; + + if elem_settings.identify_language || elem_settings.identify_multiple_languages { + // TranslateLoop will determine on a case by case basis whether + // the Translate service must be called depending on the language + // detected by Transcribe. + return true; + } + + // Transcript language is a 5 character localized language code: e.g. en-US + // Translate output language can be 2 (en) or 5 characters (en-US). + !elem_settings + .language_code + .to_ascii_lowercase() + .starts_with(&output_lang.to_ascii_lowercase()) } #[inline] @@ -1645,10 +1755,7 @@ impl TranslateSrcPad { elem_settings: &Settings, pad_settings: &TranslatePadSettings, ) -> gst::ClockTime { - if Self::needs_translation( - &elem_settings.language_code, - pad_settings.language_code.as_deref(), - ) { + if Self::needs_translation(elem_settings, pad_settings.language_code.as_deref()) { elem_settings.transcribe_latency + elem_settings.translate_latency } else { elem_settings.transcribe_latency diff --git a/net/aws/src/transcriber/transcribe.rs b/net/aws/src/transcriber/transcribe.rs index cff88f15..03be19cb 100644 --- a/net/aws/src/transcriber/transcribe.rs +++ b/net/aws/src/transcriber/transcribe.rs @@ -25,9 +25,15 @@ use super::CAT; #[derive(Debug)] pub struct TranscriberSettings { lang_code: types::LanguageCode, + identify_lang: bool, + lang_options: Option, + preferred_lang: Option, + identify_multi_lang: bool, sample_rate: i32, vocabulary: Option, + vocabularies: Option, vocabulary_filter: Option, + vocabulary_filters: Option, vocabulary_filter_method: types::VocabularyFilterMethod, session_id: Option, results_stability: types::PartialResultsStability, @@ -35,11 +41,22 @@ pub struct TranscriberSettings { impl TranscriberSettings { pub(super) fn from(settings: &Settings, sample_rate: i32) -> Self { + let preferred_lang = settings + .preferred_language + .as_ref() + .map(|pref_lang| pref_lang.as_str().into()); + TranscriberSettings { lang_code: settings.language_code.as_str().into(), + identify_lang: settings.identify_language, + lang_options: settings.language_options.clone(), + preferred_lang, + identify_multi_lang: settings.identify_multiple_languages, sample_rate, vocabulary: settings.vocabulary.clone(), + vocabularies: settings.vocabularies.clone(), vocabulary_filter: settings.vocabulary_filter.clone(), + vocabulary_filters: settings.vocabulary_filters.clone(), vocabulary_filter_method: settings.vocabulary_filter_method.into(), session_id: settings.session_id.clone(), results_stability: settings.results_stability.into(), @@ -47,10 +64,11 @@ impl TranscriberSettings { } } -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug)] pub struct TranscriptItem { pub pts: gst::ClockTime, pub duration: gst::ClockTime, + pub lang_code: Option, pub content: String, pub is_punctuation: bool, } @@ -58,6 +76,7 @@ pub struct TranscriptItem { impl TranscriptItem { pub fn from( item: types::Item, + lang_code: Option, lateness: gst::ClockTime, discont_offset: gst::ClockTime, ) -> Option { @@ -73,6 +92,7 @@ impl TranscriptItem { Some(TranscriptItem { pts: start_time, duration: end_time - start_time, + lang_code, content, is_punctuation: matches!(item.r#type, Some(types::ItemType::Punctuation)), }) @@ -100,7 +120,8 @@ pub struct TranscriberStream { imp: glib::subclass::ObjectImplRef, output: aws_transcribe::operation::start_stream_transcription::StartStreamTranscriptionOutput, lateness: gst::ClockTime, - partial_index: usize, + last_stable_end_time: f64, + last_stable_is_punctuation: bool, discont_offset_tracker: Arc>, } @@ -147,18 +168,42 @@ impl TranscriberStream { let mut transcribe_builder = client .start_stream_transcription() - .language_code(settings.lang_code) .media_sample_rate_hertz(settings.sample_rate) .media_encoding(types::MediaEncoding::Pcm) .enable_partial_results_stabilization(true) .partial_results_stability(settings.results_stability) - .set_vocabulary_name(settings.vocabulary) .set_session_id(settings.session_id); - if let Some(vocabulary_filter) = settings.vocabulary_filter { + // From the doc: + // + // > Note that you must include either LanguageCode or IdentifyLanguage or + // > IdentifyMultipleLanguages in your request. If you include more than one + // > of these parameters, your transcription job fails. + // + // https://docs.rs/aws-sdk-transcribestreaming/1.17.0/aws_sdk_transcribestreaming/operation/start_stream_transcription/builders/struct.StartStreamTranscriptionFluentBuilder.html#method.identify_language + if settings.identify_lang || settings.identify_multi_lang { transcribe_builder = transcribe_builder - .vocabulary_filter_name(vocabulary_filter) - .vocabulary_filter_method(settings.vocabulary_filter_method); + .set_language_options(settings.lang_options) + .set_preferred_language(settings.preferred_lang) + .identify_language(!settings.identify_multi_lang) + .identify_multiple_languages(settings.identify_multi_lang) + .set_vocabulary_names(settings.vocabularies); + + if let Some(vocabulary_filters) = settings.vocabulary_filters { + transcribe_builder = transcribe_builder + .vocabulary_filter_names(vocabulary_filters) + .vocabulary_filter_method(settings.vocabulary_filter_method); + } + } else { + transcribe_builder = transcribe_builder + .language_code(settings.lang_code) + .set_vocabulary_name(settings.vocabulary); + + if let Some(vocabulary_filter) = settings.vocabulary_filter { + transcribe_builder = transcribe_builder + .vocabulary_filter_name(vocabulary_filter) + .vocabulary_filter_method(settings.vocabulary_filter_method); + } } let output = transcribe_builder @@ -175,12 +220,15 @@ impl TranscriberStream { imp: imp.ref_counted(), output, lateness, - partial_index: 0, + last_stable_end_time: 0.0f64, + last_stable_is_punctuation: false, discont_offset_tracker, }) } pub async fn next(&mut self) -> Result { + let lang_code = self.output.language_code().cloned(); + loop { let event = self .output @@ -199,8 +247,6 @@ impl TranscriberStream { }; if let types::TranscriptResultStream::TranscriptEvent(transcript_evt) = event { - let mut ready_items = None; - if let Some(result) = transcript_evt .transcript .and_then(|transcript| transcript.results) @@ -212,14 +258,17 @@ impl TranscriberStream { .alternatives .and_then(|mut alternatives| alternatives.drain(..).next()) { - ready_items = alternative.items.and_then(|items| { - self.get_ready_transcript_items(items, result.is_partial) - }); - } - } + if let Some(items) = alternative.items { + let ready_items = self.get_ready_transcript_items( + items, + result.language_code.or_else(|| lang_code.clone()), + ); - if let Some(ready_items) = ready_items { - return Ok(ready_items.into()); + if !ready_items.is_empty() { + return Ok(ready_items.into()); + } + } + } } } else { gst::warning!( @@ -234,57 +283,71 @@ impl TranscriberStream { /// Builds a list from the provided stable items. fn get_ready_transcript_items( &mut self, - mut items: Vec, - partial: bool, - ) -> Option> { - if items.len() <= self.partial_index { - gst::error!( - CAT, - imp: self.imp, - "sanity check failed, alternative length {} < partial_index {}", - items.len(), - self.partial_index - ); - - if !partial { - self.partial_index = 0; - } - - return None; - } - + items: Vec, + lang_code: Option, + ) -> Vec { let mut output = vec![]; - for item in items.drain(self.partial_index..) { + // With language identification, we can receive several non-partial sub-segment + // results for individual sentences. E.g. starting from a segment with 3 sentences: + // + // - ... several partial results with some stabilized items. + // - partial result with all stable items for the segment (3 sentences). + // - non-partial sub-segment result with items & timestamps from 1st sentence. + // - non-partial sub-segment result with items & timestamps from 2nd sentence. + // - non-partial sub-segment result with items & timestamps from 3nd sentence. + // end_time matches the end_time of the last stable item. + // - partial result for next segment... + // + // Also had the case of a non-partial segment followed by the same + // segment flagged as partial. + // + // We can't expect the items sequence to be stable anymore and skip already + // processed items based on the partial_index. The approach here consists in + // using the item timestamp to determine which item should be skipped. + + for item in items { if !item.stable().unwrap_or(false) { break; } - let discont_offset = self.discont_offset_tracker.lock().unwrap().discont_offset; + if item.start_time < self.last_stable_end_time { + gst::trace!(CAT, imp: self.imp, "Skipping earlier item starting @ {}", item.start_time); + continue; + } - let Some(item) = TranscriptItem::from(item, self.lateness, discont_offset) else { + let is_punctuation = item + .r#type() + .map_or(false, |typ| *typ == types::ItemType::Punctuation); + if is_punctuation && self.last_stable_is_punctuation { + gst::trace!(CAT, imp: self.imp, "Skipping punctuation {:?} because last item is a punctuation too", item.content); + continue; + } + + let discont_offset = self.discont_offset_tracker.lock().unwrap().discont_offset; + let end_time = item.end_time; + + let Some(item) = + TranscriptItem::from(item, lang_code.clone(), self.lateness, discont_offset) + else { continue; }; + gst::debug!( CAT, imp: self.imp, - "Item is ready for queuing: {}, PTS {}", + "Item is ready for queuing: {}, PTS {}, lang {:?}", item.content, item.pts, + item.lang_code, ); - self.partial_index += 1; + self.last_stable_end_time = end_time; + self.last_stable_is_punctuation = is_punctuation; + output.push(item); } - if !partial { - self.partial_index = 0; - } - - if output.is_empty() { - return None; - } - - Some(output) + output } } diff --git a/net/aws/src/transcriber/translate.rs b/net/aws/src/transcriber/translate.rs index 0b37422b..d4b6afa9 100644 --- a/net/aws/src/transcriber/translate.rs +++ b/net/aws/src/transcriber/translate.rs @@ -15,6 +15,7 @@ use aws_sdk_translate::error::ProvideErrorMetadata; use futures::channel::mpsc; use futures::prelude::*; +use std::ops::ControlFlow; use std::sync::Arc; use super::imp::TranslateSrcPad; @@ -44,7 +45,6 @@ impl From<&TranscriptItem> for TranslatedItem { pub struct TranslateLoop { pad: glib::subclass::ObjectImplRef, client: aws_translate::Client, - input_lang: String, output_lang: String, tokenization_method: TranslationTokenizationMethod, transcript_rx: mpsc::Receiver>>, @@ -55,7 +55,6 @@ impl TranslateLoop { pub fn new( imp: &super::imp::Transcriber, pad: &TranslateSrcPad, - input_lang: &str, output_lang: &str, tokenization_method: TranslationTokenizationMethod, transcript_rx: mpsc::Receiver>>, @@ -69,7 +68,6 @@ impl TranslateLoop { TranslateLoop { pad: pad.ref_counted(), client: aws_sdk_translate::Client::new(aws_config), - input_lang: input_lang.to_string(), output_lang: output_lang.to_string(), tokenization_method, transcript_rx, @@ -110,11 +108,54 @@ impl TranslateLoop { } let mut ts_duration_list: Vec<(gst::ClockTime, gst::ClockTime)> = vec![]; - let mut content: Vec = vec![]; + let mut content = String::new(); + let mut content_lang = Option::::None; + let mut needs_translation = false; let mut it = transcript_items.iter().peekable(); while let Some(item) = it.next() { + let lang_changed = if !content.is_empty() { + // Some items already buffered + match (content_lang.as_ref(), item.lang_code.as_ref()) { + (Some(clang), Some(ilang)) => { + // Content and new item langs are defined + !clang.eq_ignore_ascii_case(ilang.as_str()) + } + (None, Some(_)) | (Some(_), None) => { + // Content uses an undefined lang + // but new item's lang is defined + // or Content uses a defined lang + // but incoming item's lang is undefined + true + } + (None, None) => false, + } + } else { + false + }; + + if lang_changed + && self + .handle_transcript_items( + &mut ts_duration_list, + &mut content, + needs_translation, + content_lang.take(), + ) + .await? + .is_break() + { + gst::info!(CAT, imp: self.pad, "exiting translation loop"); + break; + } + + if content.is_empty() { + // Either first item or content drained above + content_lang = item.lang_code.as_ref().map(|lang| lang.to_string()); + needs_translation = self.needs_translation(content_lang.as_deref()); + } + let suffix = match it.peek() { Some(next_item) => { if next_item.is_punctuation { @@ -125,25 +166,65 @@ impl TranslateLoop { } None => "", }; + ts_duration_list.push((item.pts, item.duration)); - content.push(match self.tokenization_method { - Tokenization::None => format!("{}{}", item.content, suffix), - Tokenization::SpanBased => { + + let item_content = + if needs_translation && self.tokenization_method == Tokenization::SpanBased { format!("{SPAN_START}{}{SPAN_END}{}", item.content, suffix) - } - }); + } else { + format!("{}{}", item.content, suffix) + }; + content.push_str(&item_content); } - let content: String = content.join(""); + if !content.is_empty() + && self + .handle_transcript_items( + &mut ts_duration_list, + &mut content, + needs_translation, + content_lang.take(), + ) + .await? + .is_break() + { + gst::info!(CAT, imp: self.pad, "exiting translation loop"); + break; + } + } - gst::debug!(CAT, imp: self.pad, "Translating {content} with {ts_duration_list:?}"); + Ok(()) + } + + #[inline] + fn needs_translation(&self, lang: Option<&str>) -> bool { + let Some(lang) = lang else { return false }; + !lang.to_ascii_lowercase().starts_with(&self.output_lang) + } + + async fn handle_transcript_items( + &mut self, + ts_duration_list: &mut Vec<(gst::ClockTime, gst::ClockTime)>, + content: &mut String, + needs_translation: bool, + content_lang: Option, + ) -> Result, gst::ErrorMessage> { + use std::mem; + use TranslationTokenizationMethod as Tokenization; + + let output_text = if needs_translation { + gst::debug!(CAT, imp: self.pad, + "Translating: '{content}' from {content_lang:?} to {} with {ts_duration_list:?}", + self.output_lang, + ); let translated_text = self .client .translate_text() - .set_source_language_code(Some(self.input_lang.clone())) + .set_source_language_code(content_lang) .set_target_language_code(Some(self.output_lang.clone())) - .set_text(Some(content)) + .set_text(Some(mem::take(content))) .send() .await .map_err(|err| { @@ -153,39 +234,43 @@ impl TranslateLoop { })? .translated_text; - gst::debug!(CAT, imp: self.pad, "Got translation {translated_text}"); + gst::debug!(CAT, imp: self.pad, "Got translation: '{translated_text}'"); - let translated_items = match self.tokenization_method { - Tokenization::None => { - // Push translation as a single item - let mut ts_duration_iter = ts_duration_list.into_iter().peekable(); + translated_text + } else { + gst::debug!(CAT, imp: self.pad, + "Not translating: '{content}' from {content_lang:?} to {} with {ts_duration_list:?}", + self.output_lang, + ); - let &(first_pts, _) = ts_duration_iter.peek().expect("at least one item"); - let (last_pts, last_duration) = - ts_duration_iter.last().expect("at least one item"); + mem::take(content) + }; - vec![TranslatedItem { - pts: first_pts, - duration: last_pts.saturating_sub(first_pts) + last_duration, - content: translated_text, - }] - } - Tokenization::SpanBased => span_tokenize_items(&translated_text, ts_duration_list), + let translated_items = + if needs_translation && self.tokenization_method == Tokenization::SpanBased { + span_tokenize_items(&output_text, ts_duration_list.drain(..)) + } else { + // Push translation as a single item + let mut ts_duration_iter = ts_duration_list.drain(..).peekable(); + + let &(first_pts, _) = ts_duration_iter.peek().expect("at least one item"); + let (last_pts, last_duration) = ts_duration_iter.last().expect("at least one item"); + + vec![TranslatedItem { + pts: first_pts, + duration: last_pts.saturating_sub(first_pts) + last_duration, + content: output_text, + }] }; - gst::trace!(CAT, imp: self.pad, "Sending {translated_items:?}"); + gst::trace!(CAT, imp: self.pad, "Sending {translated_items:?}"); - if self.translate_tx.send(translated_items).await.is_err() { - gst::info!( - CAT, - imp: self.pad, - "translation chan terminated, exiting translation loop" - ); - break; - } + if self.translate_tx.send(translated_items).await.is_err() { + gst::info!(CAT, imp: self.pad, "translation chan terminated"); + return Ok(ControlFlow::Break(())); } - Ok(()) + Ok(ControlFlow::Continue(())) } }