mirror of
https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs.git
synced 2025-09-03 02:03:48 +00:00
net/aws: consolidate apostrophes with next word in span_tokenize_items
As the text to translate in span-based mode looks like: <span>the</span> <span>year</span> We may receive back from the service when translating to French: <span>l'</span> <span>année</span> Which in turn means we'll push out two items. It makes more sense to push those as a single item, as this will prevent downstream elements from inserting a space. Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/2126>
This commit is contained in:
parent
644c10f62c
commit
10ed6582a9
1 changed files with 19 additions and 1 deletions
|
@ -15,6 +15,7 @@ use aws_sdk_translate::error::ProvideErrorMetadata;
|
|||
use futures::channel::mpsc;
|
||||
use futures::prelude::*;
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::imp::TranslateSrcPad;
|
||||
|
@ -343,7 +344,24 @@ pub fn span_tokenize_items(
|
|||
}
|
||||
}
|
||||
|
||||
translated_items
|
||||
let mut consolidated_items: VecDeque<TranslatedItem> = VecDeque::new();
|
||||
let mut consolidate = false;
|
||||
|
||||
for item in translated_items.drain(..) {
|
||||
if consolidate {
|
||||
let last_item = consolidated_items.back_mut().unwrap();
|
||||
last_item.duration = item.pts + item.duration - last_item.pts;
|
||||
last_item.content += &item.content;
|
||||
consolidate = false;
|
||||
continue;
|
||||
}
|
||||
if item.content.ends_with("'") || item.content.ends_with("’") {
|
||||
consolidate = true;
|
||||
}
|
||||
consolidated_items.push_back(item);
|
||||
}
|
||||
|
||||
consolidated_items.into()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
Loading…
Reference in a new issue