mirror of
https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs.git
synced 2024-06-03 05:49:31 +00:00
299e25ab3c
This commit adds an optional experimental translation tokenization feature. It can be activated using the `translation_src_%u` pads property `tokenization-method`. For the moment, the feature is deactivated by default. The Translate ws accepts '<span></span>' tags in the input and adds matching tags in the output. When an 'id' is also provided as an attribute of the 'span', the matching output tag also uses this 'id'. In the context of close captions, the 'id's are of little use. However, we can take advantage of the spans in the output to identify translation chunks, which more or less reflect the rythm of the input transcript. This commit adds simples spans (no 'id') to the input Transcript Items and parses the resulting spans in the translated output, assigning the timestamps and durations sequentially from the input Transcript Items. Edge cases such as absence of spans, nested spans were observed and are handled here. Similarly, mismatches between the number of input and output items are taken care of by some sort of reconcialiation. Note that this is still experimental and requires further testings. Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/1109>
123 lines
4 KiB
Rust
123 lines
4 KiB
Rust
// Copyright (C) 2020 Mathieu Duponchelle <mathieu@centricular.com>
|
|
//
|
|
// This Source Code Form is subject to the terms of the Mozilla Public License, v2.0.
|
|
// If a copy of the MPL was not distributed with this file, You can obtain one at
|
|
// <https://mozilla.org/MPL/2.0/>.
|
|
//
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
use gst::glib;
|
|
use gst::prelude::*;
|
|
|
|
mod imp;
|
|
mod transcribe;
|
|
mod translate;
|
|
|
|
use once_cell::sync::Lazy;
|
|
|
|
static CAT: Lazy<gst::DebugCategory> = Lazy::new(|| {
|
|
gst::DebugCategory::new(
|
|
"awstranscribe",
|
|
gst::DebugColorFlags::empty(),
|
|
Some("AWS Transcribe element"),
|
|
)
|
|
});
|
|
|
|
use aws_sdk_transcribestreaming::model::{PartialResultsStability, VocabularyFilterMethod};
|
|
|
|
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy, glib::Enum)]
|
|
#[repr(u32)]
|
|
#[enum_type(name = "GstAwsTranscriberResultStability")]
|
|
#[non_exhaustive]
|
|
pub enum AwsTranscriberResultStability {
|
|
#[enum_value(name = "High: stabilize results as fast as possible", nick = "high")]
|
|
High = 0,
|
|
#[enum_value(
|
|
name = "Medium: balance between stability and accuracy",
|
|
nick = "medium"
|
|
)]
|
|
Medium = 1,
|
|
#[enum_value(
|
|
name = "Low: relatively less stable partial transcription results with higher accuracy",
|
|
nick = "low"
|
|
)]
|
|
Low = 2,
|
|
}
|
|
|
|
impl From<AwsTranscriberResultStability> for PartialResultsStability {
|
|
fn from(val: AwsTranscriberResultStability) -> Self {
|
|
use AwsTranscriberResultStability::*;
|
|
match val {
|
|
High => PartialResultsStability::High,
|
|
Medium => PartialResultsStability::Medium,
|
|
Low => PartialResultsStability::Low,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy, glib::Enum)]
|
|
#[repr(u32)]
|
|
#[enum_type(name = "GstAwsTranscriberVocabularyFilterMethod")]
|
|
#[non_exhaustive]
|
|
pub enum AwsTranscriberVocabularyFilterMethod {
|
|
#[enum_value(name = "Mask: replace words with ***", nick = "mask")]
|
|
Mask = 0,
|
|
#[enum_value(name = "Remove: delete words", nick = "remove")]
|
|
Remove = 1,
|
|
#[enum_value(name = "Tag: flag words without changing them", nick = "tag")]
|
|
Tag = 2,
|
|
}
|
|
|
|
impl From<AwsTranscriberVocabularyFilterMethod> for VocabularyFilterMethod {
|
|
fn from(val: AwsTranscriberVocabularyFilterMethod) -> Self {
|
|
use AwsTranscriberVocabularyFilterMethod::*;
|
|
match val {
|
|
Mask => VocabularyFilterMethod::Mask,
|
|
Remove => VocabularyFilterMethod::Remove,
|
|
Tag => VocabularyFilterMethod::Tag,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy, glib::Enum)]
|
|
#[repr(u32)]
|
|
#[enum_type(name = "GstAwsTranscriberTranslationTokenizationMethod")]
|
|
#[non_exhaustive]
|
|
pub enum TranslationTokenizationMethod {
|
|
#[default]
|
|
#[enum_value(name = "None: don't tokenize translations", nick = "none")]
|
|
None = 0,
|
|
#[enum_value(
|
|
name = "Span based: insert spans in the transript text and use the resulting spans in the translations to reproduce speech pacing.",
|
|
nick = "span-based"
|
|
)]
|
|
SpanBased = 1,
|
|
}
|
|
|
|
glib::wrapper! {
|
|
pub struct Transcriber(ObjectSubclass<imp::Transcriber>) @extends gst::Element, gst::Object, @implements gst::ChildProxy;
|
|
}
|
|
|
|
glib::wrapper! {
|
|
pub struct TranslationSrcPad(ObjectSubclass<imp::TranslationSrcPad>) @extends gst::Pad, gst::Object;
|
|
}
|
|
|
|
pub fn register(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
|
|
#[cfg(feature = "doc")]
|
|
{
|
|
AwsTranscriberResultStability::static_type()
|
|
.mark_as_plugin_api(gst::PluginAPIFlags::empty());
|
|
AwsTranscriberVocabularyFilterMethod::static_type()
|
|
.mark_as_plugin_api(gst::PluginAPIFlags::empty());
|
|
TranslationTokenizationMethod::static_type()
|
|
.mark_as_plugin_api(gst::PluginAPIFlags::empty());
|
|
TranslationSrcPad::static_type().mark_as_plugin_api(gst::PluginAPIFlags::empty());
|
|
}
|
|
gst::Element::register(
|
|
Some(plugin),
|
|
"awstranscriber",
|
|
gst::Rank::None,
|
|
Transcriber::static_type(),
|
|
)
|
|
}
|