mirror of
https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs.git
synced 2025-01-10 19:25:26 +00:00
awstranscriber: fix what we send over for translations
Prior to this commit, we were sending over words concatenated together with no separators, for instance "Idon'twanttobeanemperor". The translation service seems clever enough to translate the contents anyway, but there is no reason to make its task harder than necessary, and it didn't re-add separators when the target language was the same as the source language, which resulted in less than ideal output. Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/1171>
This commit is contained in:
parent
408fd2030c
commit
f366c20869
1 changed files with 28 additions and 17 deletions
|
@ -109,23 +109,34 @@ impl TranslateLoop {
|
|||
continue;
|
||||
}
|
||||
|
||||
let (ts_duration_list, content): (Vec<(gst::ClockTime, gst::ClockTime)>, String) =
|
||||
transcript_items
|
||||
.iter()
|
||||
.map(|item| {
|
||||
(
|
||||
(item.pts, item.duration),
|
||||
match self.tokenization_method {
|
||||
Tokenization::None => item.content.clone(),
|
||||
Tokenization::SpanBased => {
|
||||
format!("{SPAN_START}{}{SPAN_END}", item.content)
|
||||
}
|
||||
},
|
||||
)
|
||||
})
|
||||
.unzip();
|
||||
let mut ts_duration_list: Vec<(gst::ClockTime, gst::ClockTime)> = vec![];
|
||||
let mut content: Vec<String> = vec![];
|
||||
|
||||
gst::trace!(CAT, imp: self.pad, "Translating {content} with {ts_duration_list:?}");
|
||||
let mut it = transcript_items.iter().peekable();
|
||||
|
||||
while let Some(item) = it.next() {
|
||||
let suffix = match it.peek() {
|
||||
Some(next_item) => {
|
||||
if next_item.is_punctuation {
|
||||
""
|
||||
} else {
|
||||
" "
|
||||
}
|
||||
}
|
||||
None => "",
|
||||
};
|
||||
ts_duration_list.push((item.pts, item.duration));
|
||||
content.push(match self.tokenization_method {
|
||||
Tokenization::None => format!("{}{}", item.content, suffix),
|
||||
Tokenization::SpanBased => {
|
||||
format!("{SPAN_START}{}{SPAN_END}{}", item.content, suffix)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
let content: String = content.join("");
|
||||
|
||||
gst::debug!(CAT, imp: self.pad, "Translating {content} with {ts_duration_list:?}");
|
||||
|
||||
let translated_text = self
|
||||
.client
|
||||
|
@ -143,7 +154,7 @@ impl TranslateLoop {
|
|||
.translated_text
|
||||
.unwrap_or_default();
|
||||
|
||||
gst::trace!(CAT, imp: self.pad, "Got translation {translated_text}");
|
||||
gst::debug!(CAT, imp: self.pad, "Got translation {translated_text}");
|
||||
|
||||
let translated_items = match self.tokenization_method {
|
||||
Tokenization::None => {
|
||||
|
|
Loading…
Reference in a new issue