mirror of
https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs.git
synced 2025-02-19 14:25:20 +00:00
aws: reimplement two separate wrappers for translate and transcribe
The original awstranscribe element has grown too complex when integrating translations for reasons that in retrospect were wrong: As awstranscribe outputs words one by one, I decided we wanted to perform translations there with larger sentences if available, but an alternative design where a separate translation element is composed downstream is also possible, as long as that element accumulates words and enough latency is set on the transcriber. An important difference is that the new elements do not expose unsynced pads, this use case is instead now served by simple messages on the bus. The elements should otherwise be at feature parity with the original element. A higher-level bin is also provided for convenience (and usage within transcriberbin): translationbin. A transcriber element can be provided to this bin, which exposes an always audio sink pad, and an always text sink pad (for the transcripts). Additional source pads can be requested for translations, for now the bin always uses `awstranslate` as the translator, but this can be made configurable. This element is usable as a transcriber in `transcriberbin`. Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/2055>
This commit is contained in:
parent
7d26990d4a
commit
71121a2380
12 changed files with 3343 additions and 5 deletions
|
@ -1379,6 +1379,282 @@
|
|||
}
|
||||
},
|
||||
"rank": "none"
|
||||
},
|
||||
"awstranscriber2": {
|
||||
"author": "Mathieu Duponchelle <mathieu@centricular.com>",
|
||||
"description": "Speech to Text filter, using AWS transcribe",
|
||||
"hierarchy": [
|
||||
"GstAwsTranscriber2",
|
||||
"GstElement",
|
||||
"GstObject",
|
||||
"GInitiallyUnowned",
|
||||
"GObject"
|
||||
],
|
||||
"klass": "Audio/Text/Filter",
|
||||
"pad-templates": {
|
||||
"sink": {
|
||||
"caps": "audio/x-raw:\n rate: 48000\n channels: 1\n layout: { (string)interleaved, (string)non-interleaved }\n format: S16LE\n",
|
||||
"direction": "sink",
|
||||
"presence": "always"
|
||||
},
|
||||
"src": {
|
||||
"caps": "text/x-raw:\n format: utf8\n",
|
||||
"direction": "src",
|
||||
"presence": "always"
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"access-key": {
|
||||
"blurb": "AWS Access Key",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "NULL",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"language-code": {
|
||||
"blurb": "The Language of the Stream, see <https://docs.aws.amazon.com/transcribe/latest/dg/how-streaming-transcription.html> for an up to date list of allowed languages",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "en-US",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"latency": {
|
||||
"blurb": "Amount of milliseconds to allow AWS transcribe",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "1000",
|
||||
"max": "-1",
|
||||
"min": "0",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "guint",
|
||||
"writable": true
|
||||
},
|
||||
"lateness": {
|
||||
"blurb": "Amount of milliseconds to introduce as lateness",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "0",
|
||||
"max": "-1",
|
||||
"min": "0",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "guint",
|
||||
"writable": true
|
||||
},
|
||||
"results-stability": {
|
||||
"blurb": "Defines how fast results should stabilize",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "low (2)",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "GstAwsTranscriberResultStability",
|
||||
"writable": true
|
||||
},
|
||||
"secret-access-key": {
|
||||
"blurb": "AWS Secret Access Key",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "NULL",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"session-id": {
|
||||
"blurb": "The ID of the transcription session, must be length 36",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "NULL",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"session-token": {
|
||||
"blurb": "AWS temporary Session Token from STS",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "NULL",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"vocabulary-filter-method": {
|
||||
"blurb": "Defines how filtered words will be edited, has no effect when vocabulary-filter-name isn't set",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "mask (0)",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "GstAwsTranscriberVocabularyFilterMethod",
|
||||
"writable": true
|
||||
},
|
||||
"vocabulary-filter-name": {
|
||||
"blurb": "The name of a custom filter vocabulary, see <https://docs.aws.amazon.com/transcribe/latest/help-panel/vocab-filter.html> for more information",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "NULL",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"vocabulary-name": {
|
||||
"blurb": "The name of a custom vocabulary, see <https://docs.aws.amazon.com/transcribe/latest/dg/how-vocabulary.html> for more information",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "NULL",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
}
|
||||
},
|
||||
"rank": "none"
|
||||
},
|
||||
"awstranslate": {
|
||||
"author": "Mathieu Duponchelle <mathieu@centricular.com>",
|
||||
"description": "Translates text",
|
||||
"hierarchy": [
|
||||
"GstAwsTranslate",
|
||||
"GstElement",
|
||||
"GstObject",
|
||||
"GInitiallyUnowned",
|
||||
"GObject"
|
||||
],
|
||||
"klass": "Text/Filter",
|
||||
"pad-templates": {
|
||||
"sink": {
|
||||
"caps": "text/x-raw:\n format: utf8\n",
|
||||
"direction": "sink",
|
||||
"presence": "always"
|
||||
},
|
||||
"src": {
|
||||
"caps": "text/x-raw:\n format: utf8\n",
|
||||
"direction": "src",
|
||||
"presence": "always"
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"access-key": {
|
||||
"blurb": "AWS Access Key",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "NULL",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"input-language-code": {
|
||||
"blurb": "The Language of the input stream",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "en-US",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"latency": {
|
||||
"blurb": "Amount of milliseconds to allow AWS Polly",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "2000",
|
||||
"max": "-1",
|
||||
"min": "0",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "guint",
|
||||
"writable": true
|
||||
},
|
||||
"output-language-code": {
|
||||
"blurb": "The Language of the output stream",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "fr-FR",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"secret-access-key": {
|
||||
"blurb": "AWS Secret Access Key",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "NULL",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"session-token": {
|
||||
"blurb": "AWS temporary Session Token from STS",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "NULL",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"tokenization-method": {
|
||||
"blurb": "The tokenization method to apply",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "span-based (1)",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "GstAwsTranscriberTranslationTokenizationMethod",
|
||||
"writable": true
|
||||
}
|
||||
},
|
||||
"rank": "none"
|
||||
}
|
||||
},
|
||||
"filename": "gstaws",
|
||||
|
@ -8786,6 +9062,108 @@
|
|||
},
|
||||
"rank": "none"
|
||||
},
|
||||
"translationbin": {
|
||||
"author": "Mathieu Duponchelle <mathieu@centricular.com>",
|
||||
"description": "Transcribes audio and translates it",
|
||||
"hierarchy": [
|
||||
"GstTranslationBin",
|
||||
"GstBin",
|
||||
"GstElement",
|
||||
"GstObject",
|
||||
"GInitiallyUnowned",
|
||||
"GObject"
|
||||
],
|
||||
"interfaces": [
|
||||
"GstChildProxy"
|
||||
],
|
||||
"klass": "Audio / Text",
|
||||
"pad-templates": {
|
||||
"sink": {
|
||||
"caps": "audio/x-raw:\n",
|
||||
"direction": "sink",
|
||||
"presence": "always"
|
||||
},
|
||||
"src": {
|
||||
"caps": "text/x-raw:\n format: utf8\n",
|
||||
"direction": "src",
|
||||
"presence": "always"
|
||||
},
|
||||
"translate_src_%%u": {
|
||||
"caps": "text/x-raw:\n format: utf8\n",
|
||||
"direction": "src",
|
||||
"presence": "request",
|
||||
"type": "GstTranslationBinTranslationSrcPad"
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"language-code": {
|
||||
"blurb": "The language of the input stream",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "en-US",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"lateness": {
|
||||
"blurb": "Amount of milliseconds to offset transcription by",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "0",
|
||||
"max": "-1",
|
||||
"min": "0",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "guint",
|
||||
"writable": true
|
||||
},
|
||||
"transcribe-latency": {
|
||||
"blurb": "Amount of milliseconds to allow for transcription",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "1000",
|
||||
"max": "-1",
|
||||
"min": "0",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "guint",
|
||||
"writable": true
|
||||
},
|
||||
"transcriber": {
|
||||
"blurb": "The transcriber element to use",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "GstElement",
|
||||
"writable": true
|
||||
},
|
||||
"translate-latency": {
|
||||
"blurb": "Amount of milliseconds to allow for translation",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "500",
|
||||
"max": "-1",
|
||||
"min": "0",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "guint",
|
||||
"writable": true
|
||||
}
|
||||
},
|
||||
"rank": "none"
|
||||
},
|
||||
"tttocea608": {
|
||||
"author": "Mathieu Duponchelle <mathieu@centricular.com>",
|
||||
"description": "Converts timed text to CEA-608 Closed Captions",
|
||||
|
@ -9221,6 +9599,32 @@
|
|||
],
|
||||
"kind": "object"
|
||||
},
|
||||
"GstTranslationBinTranslationSrcPad": {
|
||||
"hierarchy": [
|
||||
"GstTranslationBinTranslationSrcPad",
|
||||
"GstGhostPad",
|
||||
"GstProxyPad",
|
||||
"GstPad",
|
||||
"GstObject",
|
||||
"GInitiallyUnowned",
|
||||
"GObject"
|
||||
],
|
||||
"kind": "object",
|
||||
"properties": {
|
||||
"language-code": {
|
||||
"blurb": "The language of the output stream",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "fr-FR",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"GstTtToCea608Mode": {
|
||||
"kind": "enum",
|
||||
"values": [
|
||||
|
|
|
@ -22,6 +22,8 @@ mod s3url;
|
|||
pub mod s3utils;
|
||||
mod transcribe_parse;
|
||||
mod transcriber;
|
||||
mod transcriber2;
|
||||
mod translate;
|
||||
|
||||
pub use transcriber::AwsTranscriberResultStability;
|
||||
|
||||
|
@ -30,6 +32,8 @@ fn plugin_init(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
|
|||
s3src::register(plugin)?;
|
||||
transcribe_parse::register(plugin)?;
|
||||
transcriber::register(plugin)?;
|
||||
transcriber2::register(plugin)?;
|
||||
translate::register(plugin)?;
|
||||
s3hlssink::register(plugin)?;
|
||||
polly::register(plugin)?;
|
||||
|
||||
|
|
|
@ -118,8 +118,7 @@ impl Polly {
|
|||
}
|
||||
Ok(segment) => segment,
|
||||
};
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.out_segment = segment;
|
||||
self.state.lock().unwrap().out_segment = segment;
|
||||
gst::Pad::event_default(pad, Some(&*self.obj()), event)
|
||||
}
|
||||
Caps(c) => {
|
||||
|
|
|
@ -10,9 +10,9 @@ use gst::glib;
|
|||
use gst::prelude::*;
|
||||
|
||||
mod imp;
|
||||
mod remote_types;
|
||||
pub mod remote_types;
|
||||
mod transcribe;
|
||||
mod translate;
|
||||
pub mod translate;
|
||||
|
||||
use std::sync::LazyLock;
|
||||
|
||||
|
|
|
@ -223,7 +223,7 @@ impl TranslateLoop {
|
|||
///
|
||||
/// If less parsed items are found, the last item will be assign the remaining
|
||||
/// duration from the `ts_duration_list`.
|
||||
fn span_tokenize_items(
|
||||
pub fn span_tokenize_items(
|
||||
translation: &str,
|
||||
ts_duration_list: impl IntoIterator<Item = (gst::ClockTime, gst::ClockTime)>,
|
||||
) -> Vec<TranslatedItem> {
|
||||
|
|
1138
net/aws/src/transcriber2/imp.rs
Normal file
1138
net/aws/src/transcriber2/imp.rs
Normal file
File diff suppressed because it is too large
Load diff
35
net/aws/src/transcriber2/mod.rs
Normal file
35
net/aws/src/transcriber2/mod.rs
Normal file
|
@ -0,0 +1,35 @@
|
|||
// Copyright (C) 2025 Mathieu Duponchelle <mathieu@centricular.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla Public License, v2.0.
|
||||
// If a copy of the MPL was not distributed with this file, You can obtain one at
|
||||
// <https://mozilla.org/MPL/2.0/>.
|
||||
//
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
use gst::glib;
|
||||
use gst::prelude::*;
|
||||
|
||||
mod imp;
|
||||
|
||||
use std::sync::LazyLock;
|
||||
|
||||
static CAT: LazyLock<gst::DebugCategory> = LazyLock::new(|| {
|
||||
gst::DebugCategory::new(
|
||||
"awstranscriber2",
|
||||
gst::DebugColorFlags::empty(),
|
||||
Some("AWS transcribe element, second try"),
|
||||
)
|
||||
});
|
||||
|
||||
glib::wrapper! {
|
||||
pub struct Transcriber(ObjectSubclass<imp::Transcriber>) @extends gst::Element, gst::Object;
|
||||
}
|
||||
|
||||
pub fn register(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
|
||||
gst::Element::register(
|
||||
Some(plugin),
|
||||
"awstranscriber2",
|
||||
gst::Rank::NONE,
|
||||
Transcriber::static_type(),
|
||||
)
|
||||
}
|
1138
net/aws/src/translate/imp.rs
Normal file
1138
net/aws/src/translate/imp.rs
Normal file
File diff suppressed because it is too large
Load diff
35
net/aws/src/translate/mod.rs
Normal file
35
net/aws/src/translate/mod.rs
Normal file
|
@ -0,0 +1,35 @@
|
|||
// Copyright (C) 2025 Mathieu Duponchelle <mathieu@centricular.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla Public License, v2.0.
|
||||
// If a copy of the MPL was not distributed with this file, You can obtain one at
|
||||
// <https://mozilla.org/MPL/2.0/>.
|
||||
//
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
use gst::glib;
|
||||
use gst::prelude::*;
|
||||
|
||||
mod imp;
|
||||
|
||||
use std::sync::LazyLock;
|
||||
|
||||
static CAT: LazyLock<gst::DebugCategory> = LazyLock::new(|| {
|
||||
gst::DebugCategory::new(
|
||||
"awstranslate",
|
||||
gst::DebugColorFlags::empty(),
|
||||
Some("AWS translate element"),
|
||||
)
|
||||
});
|
||||
|
||||
glib::wrapper! {
|
||||
pub struct Translate(ObjectSubclass<imp::Translate>) @extends gst::Element, gst::Object;
|
||||
}
|
||||
|
||||
pub fn register(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
|
||||
gst::Element::register(
|
||||
Some(plugin),
|
||||
"awstranslate",
|
||||
gst::Rank::NONE,
|
||||
Translate::static_type(),
|
||||
)
|
||||
}
|
|
@ -40,6 +40,7 @@ mod st2038ancdemux;
|
|||
mod st2038ancmux;
|
||||
mod st2038anctocc;
|
||||
mod transcriberbin;
|
||||
mod translationbin;
|
||||
mod tttocea608;
|
||||
mod tttocea708;
|
||||
mod tttojson;
|
||||
|
@ -63,6 +64,7 @@ fn plugin_init(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
|
|||
cea608tojson::register(plugin)?;
|
||||
jsontovtt::register(plugin)?;
|
||||
transcriberbin::register(plugin)?;
|
||||
translationbin::register(plugin)?;
|
||||
cea608tocea708::register(plugin)?;
|
||||
cea708mux::register(plugin)?;
|
||||
tttocea708::register(plugin)?;
|
||||
|
|
549
video/closedcaption/src/translationbin/imp.rs
Normal file
549
video/closedcaption/src/translationbin/imp.rs
Normal file
|
@ -0,0 +1,549 @@
|
|||
// Copyright (C) 2025 Mathieu Duponchelle <mathieu@centricular.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla Public License, v2.0.
|
||||
// If a copy of the MPL was not distributed with this file, You can obtain one at
|
||||
// <https://mozilla.org/MPL/2.0/>.
|
||||
//
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
use anyhow::anyhow;
|
||||
use gst::glib;
|
||||
use gst::prelude::*;
|
||||
use gst::subclass::prelude::*;
|
||||
|
||||
use anyhow::Error;
|
||||
use std::collections::HashSet;
|
||||
use std::sync::{LazyLock, Mutex};
|
||||
|
||||
static CAT: LazyLock<gst::DebugCategory> = LazyLock::new(|| {
|
||||
gst::DebugCategory::new(
|
||||
"translationbin",
|
||||
gst::DebugColorFlags::empty(),
|
||||
Some("Transcribes and translates text"),
|
||||
)
|
||||
});
|
||||
|
||||
const DEFAULT_TRANSCRIBE_LATENCY: gst::ClockTime = gst::ClockTime::from_seconds(1);
|
||||
const DEFAULT_LATENESS: gst::ClockTime = gst::ClockTime::ZERO;
|
||||
const DEFAULT_TRANSLATE_LATENCY: gst::ClockTime = gst::ClockTime::from_mseconds(500);
|
||||
const DEFAULT_INPUT_LANG_CODE: &str = "en-US";
|
||||
const DEFAULT_OUTPUT_LANG_CODE: &str = "fr-FR";
|
||||
|
||||
struct State {
|
||||
transcriber: Option<gst::Element>,
|
||||
tee: Option<gst::Element>,
|
||||
queue: Option<gst::Element>,
|
||||
srcpads: HashSet<super::TranslationSrcPad>,
|
||||
pad_serial: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct Settings {
|
||||
language_code: String,
|
||||
transcribe_latency: gst::ClockTime,
|
||||
lateness: gst::ClockTime,
|
||||
translate_latency: gst::ClockTime,
|
||||
}
|
||||
|
||||
impl Default for Settings {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
language_code: DEFAULT_INPUT_LANG_CODE.to_string(),
|
||||
transcribe_latency: DEFAULT_TRANSCRIBE_LATENCY,
|
||||
lateness: DEFAULT_LATENESS,
|
||||
translate_latency: DEFAULT_TRANSLATE_LATENCY,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TranslationBin {
|
||||
state: Mutex<State>,
|
||||
settings: Mutex<Settings>,
|
||||
audio_sinkpad: gst::GhostPad,
|
||||
transcript_srcpad: gst::GhostPad,
|
||||
}
|
||||
|
||||
impl TranslationBin {
|
||||
fn prepare(&self) -> Result<(), Error> {
|
||||
let (transcriber, srcpads) = {
|
||||
let state = self.state.lock().unwrap();
|
||||
|
||||
let transcriber = match state.transcriber {
|
||||
Some(ref transcriber) => transcriber.clone(),
|
||||
None => gst::ElementFactory::make("awstranscriber2").build()?,
|
||||
};
|
||||
|
||||
(transcriber, state.srcpads.clone())
|
||||
};
|
||||
|
||||
let Settings {
|
||||
transcribe_latency,
|
||||
lateness,
|
||||
translate_latency,
|
||||
language_code,
|
||||
} = self.settings.lock().unwrap().clone();
|
||||
|
||||
let transcribe_latency_ms = transcribe_latency.mseconds() as u32;
|
||||
let lateness_ms = lateness.mseconds() as u32;
|
||||
let translate_latency_ms = translate_latency.mseconds() as u32;
|
||||
|
||||
if transcriber.has_property_with_type("transcribe-latency", u32::static_type()) {
|
||||
transcriber.set_property("transcribe-latency", transcribe_latency_ms);
|
||||
} else if transcriber.has_property_with_type("latency", u32::static_type()) {
|
||||
transcriber.set_property("latency", transcribe_latency_ms);
|
||||
}
|
||||
|
||||
if transcriber.has_property_with_type("lateness", u32::static_type()) {
|
||||
transcriber.set_property("lateness", lateness_ms);
|
||||
}
|
||||
|
||||
transcriber.set_property("language-code", &language_code);
|
||||
|
||||
let tee = gst::ElementFactory::make("tee")
|
||||
.property("allow-not-linked", true)
|
||||
.build()?;
|
||||
let queue = gst::ElementFactory::make("queue").build()?;
|
||||
|
||||
let obj = self.obj();
|
||||
|
||||
obj.add_many([&transcriber, &tee, &queue])?;
|
||||
|
||||
transcriber.sync_state_with_parent()?;
|
||||
tee.sync_state_with_parent()?;
|
||||
|
||||
transcriber.link(&tee)?;
|
||||
tee.link(&queue)?;
|
||||
|
||||
self.audio_sinkpad.set_target(Some(
|
||||
&transcriber
|
||||
.static_pad("sink")
|
||||
.ok_or(anyhow!("No pad named sink on transcriber"))?,
|
||||
))?;
|
||||
|
||||
self.transcript_srcpad
|
||||
.set_target(Some(&queue.static_pad("src").unwrap()))?;
|
||||
|
||||
for srcpad in srcpads {
|
||||
let output_language_code = srcpad.imp().settings.lock().unwrap().language_code.clone();
|
||||
|
||||
let queue = gst::ElementFactory::make("queue").build()?;
|
||||
let translator = gst::ElementFactory::make("awstranslate")
|
||||
.property("input-language-code", &language_code)
|
||||
.property("output-language-code", output_language_code)
|
||||
.build()?;
|
||||
|
||||
if translator.has_property_with_type("latency", u32::static_type()) {
|
||||
translator.set_property("latency", translate_latency_ms);
|
||||
}
|
||||
|
||||
obj.add_many([&queue, &translator])?;
|
||||
queue.sync_state_with_parent()?;
|
||||
translator.sync_state_with_parent()?;
|
||||
|
||||
tee.link(&queue)?;
|
||||
queue.link(&translator)?;
|
||||
|
||||
srcpad.set_target(Some(
|
||||
&translator
|
||||
.static_pad("src")
|
||||
.ok_or(anyhow!("No pad named src on translator"))?,
|
||||
))?;
|
||||
|
||||
let mut pad_state = srcpad.imp().state.lock().unwrap();
|
||||
|
||||
pad_state.queue = Some(queue);
|
||||
pad_state.translator = Some(translator);
|
||||
}
|
||||
|
||||
let mut state = self.state.lock().unwrap();
|
||||
|
||||
state.transcriber = Some(transcriber);
|
||||
state.tee = Some(tee);
|
||||
state.queue = Some(queue);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn unprepare(&self) -> Result<(), Error> {
|
||||
let (transcriber, tee, queue) = {
|
||||
let mut state = self.state.lock().unwrap();
|
||||
|
||||
(
|
||||
state.transcriber.as_ref().unwrap().clone(),
|
||||
state.tee.take().unwrap(),
|
||||
state.queue.take().unwrap(),
|
||||
)
|
||||
};
|
||||
let obj = self.obj();
|
||||
|
||||
transcriber.unlink(&tee);
|
||||
|
||||
obj.remove_many([&transcriber, &tee, &queue])?;
|
||||
|
||||
self.audio_sinkpad.set_target(None::<&gst::Pad>)?;
|
||||
self.transcript_srcpad.set_target(None::<&gst::Pad>)?;
|
||||
|
||||
let srcpads = self.state.lock().unwrap().srcpads.clone();
|
||||
|
||||
for srcpad in srcpads {
|
||||
let (queue, translator) = {
|
||||
let mut pad_state = srcpad.imp().state.lock().unwrap();
|
||||
|
||||
(
|
||||
pad_state.queue.take().unwrap(),
|
||||
pad_state.translator.take().unwrap(),
|
||||
)
|
||||
};
|
||||
|
||||
obj.remove_many([&queue, &translator])?;
|
||||
|
||||
srcpad.set_target(None::<&gst::Pad>)?;
|
||||
|
||||
let _ = queue.set_state(gst::State::Null);
|
||||
let _ = translator.set_state(gst::State::Null);
|
||||
}
|
||||
|
||||
let _ = transcriber.set_state(gst::State::Null);
|
||||
let _ = tee.set_state(gst::State::Null);
|
||||
let _ = queue.set_state(gst::State::Null);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[glib::object_subclass]
|
||||
impl ObjectSubclass for TranslationBin {
|
||||
const NAME: &'static str = "GstTranslationBin";
|
||||
type Type = super::TranslationBin;
|
||||
type ParentType = gst::Bin;
|
||||
|
||||
fn with_class(klass: &Self::Class) -> Self {
|
||||
let templ = klass.pad_template("sink").unwrap();
|
||||
let audio_sinkpad = gst::PadBuilder::<gst::GhostPad>::from_template(&templ).build();
|
||||
|
||||
let templ = klass.pad_template("src").unwrap();
|
||||
let transcript_srcpad = gst::PadBuilder::<gst::GhostPad>::from_template(&templ).build();
|
||||
|
||||
Self {
|
||||
state: Mutex::new(State {
|
||||
transcriber: None,
|
||||
tee: None,
|
||||
queue: None,
|
||||
srcpads: HashSet::new(),
|
||||
pad_serial: 0,
|
||||
}),
|
||||
settings: Mutex::new(Settings::default()),
|
||||
audio_sinkpad,
|
||||
transcript_srcpad,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ObjectImpl for TranslationBin {
|
||||
fn properties() -> &'static [glib::ParamSpec] {
|
||||
static PROPERTIES: LazyLock<Vec<glib::ParamSpec>> = LazyLock::new(|| {
|
||||
vec![
|
||||
glib::ParamSpecUInt::builder("transcribe-latency")
|
||||
.nick("Transcription Latency")
|
||||
.blurb("Amount of milliseconds to allow for transcription")
|
||||
.default_value(DEFAULT_TRANSCRIBE_LATENCY.mseconds() as u32)
|
||||
.mutable_ready()
|
||||
.deprecated()
|
||||
.build(),
|
||||
glib::ParamSpecUInt::builder("lateness")
|
||||
.nick("Transcription Lateness")
|
||||
.blurb("Amount of milliseconds to offset transcription by")
|
||||
.default_value(DEFAULT_LATENESS.mseconds() as u32)
|
||||
.mutable_ready()
|
||||
.deprecated()
|
||||
.build(),
|
||||
glib::ParamSpecUInt::builder("translate-latency")
|
||||
.nick("Translation Latency")
|
||||
.blurb("Amount of milliseconds to allow for translation")
|
||||
.default_value(DEFAULT_TRANSLATE_LATENCY.mseconds() as u32)
|
||||
.mutable_ready()
|
||||
.deprecated()
|
||||
.build(),
|
||||
glib::ParamSpecString::builder("language-code")
|
||||
.nick("Language Code")
|
||||
.blurb("The language of the input stream")
|
||||
.default_value(Some(DEFAULT_INPUT_LANG_CODE))
|
||||
.mutable_ready()
|
||||
.build(),
|
||||
glib::ParamSpecObject::builder::<gst::Element>("transcriber")
|
||||
.nick("Transcriber")
|
||||
.blurb("The transcriber element to use")
|
||||
.mutable_ready()
|
||||
.build(),
|
||||
]
|
||||
});
|
||||
|
||||
PROPERTIES.as_ref()
|
||||
}
|
||||
|
||||
fn set_property(&self, _id: usize, value: &glib::Value, pspec: &glib::ParamSpec) {
|
||||
match pspec.name() {
|
||||
"transcribe-latency" => {
|
||||
let mut settings = self.settings.lock().unwrap();
|
||||
settings.transcribe_latency = gst::ClockTime::from_mseconds(
|
||||
value.get::<u32>().expect("type checked upstream").into(),
|
||||
);
|
||||
}
|
||||
"lateness" => {
|
||||
let mut settings = self.settings.lock().unwrap();
|
||||
settings.lateness = gst::ClockTime::from_mseconds(
|
||||
value.get::<u32>().expect("type checked upstream").into(),
|
||||
);
|
||||
}
|
||||
"translate-latency" => {
|
||||
let mut settings = self.settings.lock().unwrap();
|
||||
settings.translate_latency = gst::ClockTime::from_mseconds(
|
||||
value.get::<u32>().expect("type checked upstream").into(),
|
||||
);
|
||||
}
|
||||
"language-code" => {
|
||||
let language_code: String = value.get().expect("type checked upstream");
|
||||
let mut settings = self.settings.lock().unwrap();
|
||||
settings.language_code = language_code;
|
||||
}
|
||||
"transcriber" => {
|
||||
self.state.lock().unwrap().transcriber =
|
||||
value.get().expect("type checked upstream");
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn property(&self, _id: usize, pspec: &glib::ParamSpec) -> glib::Value {
|
||||
match pspec.name() {
|
||||
"transcribe-latency" => {
|
||||
let settings = self.settings.lock().unwrap();
|
||||
(settings.transcribe_latency.mseconds() as u32).to_value()
|
||||
}
|
||||
"lateness" => {
|
||||
let settings = self.settings.lock().unwrap();
|
||||
(settings.lateness.mseconds() as u32).to_value()
|
||||
}
|
||||
"translate-latency" => {
|
||||
let settings = self.settings.lock().unwrap();
|
||||
(settings.translate_latency.mseconds() as u32).to_value()
|
||||
}
|
||||
"language-code" => {
|
||||
let settings = self.settings.lock().unwrap();
|
||||
settings.language_code.to_value()
|
||||
}
|
||||
"transcriber" => self.state.lock().unwrap().transcriber.to_value(),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn constructed(&self) {
|
||||
self.parent_constructed();
|
||||
|
||||
let obj = self.obj();
|
||||
obj.add_pad(&self.audio_sinkpad).unwrap();
|
||||
obj.add_pad(&self.transcript_srcpad).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
impl GstObjectImpl for TranslationBin {}
|
||||
|
||||
impl ElementImpl for TranslationBin {
|
||||
fn request_new_pad(
|
||||
&self,
|
||||
templ: &gst::PadTemplate,
|
||||
_name: Option<&str>,
|
||||
_caps: Option<&gst::Caps>,
|
||||
) -> Option<gst::Pad> {
|
||||
let serial = {
|
||||
let mut state = self.state.lock().unwrap();
|
||||
let serial = state.pad_serial;
|
||||
state.pad_serial += 1;
|
||||
serial
|
||||
};
|
||||
|
||||
let pad = gst::PadBuilder::<super::TranslationSrcPad>::from_template(templ)
|
||||
.name(format!("translate_src_{}", serial))
|
||||
.build();
|
||||
|
||||
self.obj().add_pad(pad.upcast_ref::<gst::Pad>()).unwrap();
|
||||
|
||||
self.state.lock().unwrap().srcpads.insert(pad.clone());
|
||||
|
||||
Some(pad.upcast())
|
||||
}
|
||||
|
||||
fn release_pad(&self, pad: &gst::Pad) {
|
||||
let _ = self.state.lock().unwrap().srcpads.remove(pad);
|
||||
|
||||
let _ = self.obj().remove_pad(pad);
|
||||
}
|
||||
|
||||
fn metadata() -> Option<&'static gst::subclass::ElementMetadata> {
|
||||
static ELEMENT_METADATA: LazyLock<gst::subclass::ElementMetadata> = LazyLock::new(|| {
|
||||
gst::subclass::ElementMetadata::new(
|
||||
"TranslationBin",
|
||||
"Audio / Text",
|
||||
"Transcribes audio and translates it",
|
||||
"Mathieu Duponchelle <mathieu@centricular.com>",
|
||||
)
|
||||
});
|
||||
|
||||
Some(&*ELEMENT_METADATA)
|
||||
}
|
||||
|
||||
fn pad_templates() -> &'static [gst::PadTemplate] {
|
||||
static PAD_TEMPLATES: LazyLock<Vec<gst::PadTemplate>> = LazyLock::new(|| {
|
||||
let caps = gst::Caps::builder("audio/x-raw").build();
|
||||
let audio_sink_pad_template = gst::PadTemplate::new(
|
||||
"sink",
|
||||
gst::PadDirection::Sink,
|
||||
gst::PadPresence::Always,
|
||||
&caps,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let caps = gst::Caps::builder("text/x-raw")
|
||||
.field("format", "utf8")
|
||||
.build();
|
||||
let transcript_src_pad_template = gst::PadTemplate::new(
|
||||
"src",
|
||||
gst::PadDirection::Src,
|
||||
gst::PadPresence::Always,
|
||||
&caps,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let translate_src_pad_template = gst::PadTemplate::with_gtype(
|
||||
"translate_src_%u",
|
||||
gst::PadDirection::Src,
|
||||
gst::PadPresence::Request,
|
||||
&caps,
|
||||
super::TranslationSrcPad::static_type(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
vec![
|
||||
audio_sink_pad_template,
|
||||
transcript_src_pad_template,
|
||||
translate_src_pad_template,
|
||||
]
|
||||
});
|
||||
|
||||
PAD_TEMPLATES.as_ref()
|
||||
}
|
||||
|
||||
#[allow(clippy::single_match)]
|
||||
fn change_state(
|
||||
&self,
|
||||
transition: gst::StateChange,
|
||||
) -> Result<gst::StateChangeSuccess, gst::StateChangeError> {
|
||||
gst::trace!(CAT, imp = self, "Changing state {:?}", transition);
|
||||
|
||||
match transition {
|
||||
gst::StateChange::ReadyToPaused => {
|
||||
self.prepare().map_err(|err| {
|
||||
gst::error!(CAT, "Failed to prepare: {:?}", err);
|
||||
gst::StateChangeError
|
||||
})?;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let ret = self.parent_change_state(transition);
|
||||
|
||||
match transition {
|
||||
gst::StateChange::PausedToReady => {
|
||||
self.unprepare().map_err(|err| {
|
||||
gst::error!(CAT, "Failed to unprepare: {:?}", err);
|
||||
gst::StateChangeError
|
||||
})?;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
impl BinImpl for TranslationBin {}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TranslationSrcPadState {
|
||||
queue: Option<gst::Element>,
|
||||
translator: Option<gst::Element>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TranslationSrcPadSettings {
|
||||
language_code: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TranslationSrcPad {
|
||||
state: Mutex<TranslationSrcPadState>,
|
||||
settings: Mutex<TranslationSrcPadSettings>,
|
||||
}
|
||||
|
||||
#[glib::object_subclass]
|
||||
impl ObjectSubclass for TranslationSrcPad {
|
||||
const NAME: &'static str = "GstTranslationBinTranslationSrcPad";
|
||||
type Type = super::TranslationSrcPad;
|
||||
type ParentType = gst::GhostPad;
|
||||
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
state: Mutex::new(TranslationSrcPadState {
|
||||
queue: None,
|
||||
translator: None,
|
||||
}),
|
||||
settings: Mutex::new(TranslationSrcPadSettings {
|
||||
language_code: DEFAULT_OUTPUT_LANG_CODE.to_string(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ObjectImpl for TranslationSrcPad {
|
||||
fn properties() -> &'static [glib::ParamSpec] {
|
||||
static PROPERTIES: LazyLock<Vec<glib::ParamSpec>> = LazyLock::new(|| {
|
||||
vec![glib::ParamSpecString::builder("language-code")
|
||||
.nick("Language Code")
|
||||
.blurb("The language of the output stream")
|
||||
.default_value(Some(DEFAULT_OUTPUT_LANG_CODE))
|
||||
.mutable_ready()
|
||||
.build()]
|
||||
});
|
||||
|
||||
PROPERTIES.as_ref()
|
||||
}
|
||||
|
||||
fn set_property(&self, _id: usize, value: &glib::Value, pspec: &glib::ParamSpec) {
|
||||
match pspec.name() {
|
||||
"language-code" => {
|
||||
let language_code: String = value.get().expect("type checked upstream");
|
||||
let mut settings = self.settings.lock().unwrap();
|
||||
settings.language_code = language_code;
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn property(&self, _id: usize, pspec: &glib::ParamSpec) -> glib::Value {
|
||||
match pspec.name() {
|
||||
"language-code" => {
|
||||
let settings = self.settings.lock().unwrap();
|
||||
settings.language_code.to_value()
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl GstObjectImpl for TranslationSrcPad {}
|
||||
|
||||
impl PadImpl for TranslationSrcPad {}
|
||||
|
||||
impl ProxyPadImpl for TranslationSrcPad {}
|
||||
|
||||
impl GhostPadImpl for TranslationSrcPad {}
|
34
video/closedcaption/src/translationbin/mod.rs
Normal file
34
video/closedcaption/src/translationbin/mod.rs
Normal file
|
@ -0,0 +1,34 @@
|
|||
// Copyright (C) 2025 Mathieu Duponchelle <mathieu@centricular.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla Public License, v2.0.
|
||||
// If a copy of the MPL was not distributed with this file, You can obtain one at
|
||||
// <https://mozilla.org/MPL/2.0/>.
|
||||
//
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
use gst::glib;
|
||||
use gst::prelude::*;
|
||||
|
||||
mod imp;
|
||||
|
||||
glib::wrapper! {
|
||||
pub struct TranslationBin(ObjectSubclass<imp::TranslationBin>) @extends gst::Bin, gst::Element, gst::Object;
|
||||
}
|
||||
|
||||
glib::wrapper! {
|
||||
pub struct TranslationSrcPad(ObjectSubclass<imp::TranslationSrcPad>) @extends gst::GhostPad, gst::ProxyPad, gst::Pad, gst::Object;
|
||||
}
|
||||
|
||||
pub fn register(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
|
||||
#[cfg(feature = "doc")]
|
||||
{
|
||||
TranslationSrcPad::static_type().mark_as_plugin_api(gst::PluginAPIFlags::empty());
|
||||
}
|
||||
|
||||
gst::Element::register(
|
||||
Some(plugin),
|
||||
"translationbin",
|
||||
gst::Rank::NONE,
|
||||
TranslationBin::static_type(),
|
||||
)
|
||||
}
|
Loading…
Reference in a new issue