mirror of
https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs.git
synced 2025-09-05 03:04:03 +00:00
net/aws: don't insert space when joining leftover punctuation
Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/2126>
This commit is contained in:
parent
5fc0523c90
commit
644c10f62c
1 changed files with 43 additions and 5 deletions
|
@ -300,10 +300,15 @@ pub fn span_tokenize_items(
|
||||||
} else if let Some(last_item) = translated_items.last_mut() {
|
} else if let Some(last_item) = translated_items.last_mut() {
|
||||||
// exhausted available pts and duration
|
// exhausted available pts and duration
|
||||||
// add content to last item
|
// add content to last item
|
||||||
if !last_item.content.ends_with(' ') {
|
let starts_with_punctuation = content.starts_with(|c: char| c.is_ascii_punctuation());
|
||||||
|
|
||||||
|
if !starts_with_punctuation {
|
||||||
last_item.content.push(' ');
|
last_item.content.push(' ');
|
||||||
}
|
}
|
||||||
last_item.content.extend(content.drain(..));
|
|
||||||
|
last_item.content.push_str(content.trim());
|
||||||
|
|
||||||
|
content = String::new();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -320,15 +325,16 @@ pub fn span_tokenize_items(
|
||||||
translated_items.push(TranslatedItem {
|
translated_items.push(TranslatedItem {
|
||||||
pts,
|
pts,
|
||||||
duration,
|
duration,
|
||||||
content,
|
content: content.trim().to_string(),
|
||||||
});
|
});
|
||||||
} else if let Some(last_item) = translated_items.last_mut() {
|
} else if let Some(last_item) = translated_items.last_mut() {
|
||||||
// No more pts and duration in the index
|
// No more pts and duration in the index
|
||||||
// Add remaining content to the last item pushed
|
// Add remaining content to the last item pushed
|
||||||
if !last_item.content.ends_with(' ') {
|
let starts_with_punctuation = content.starts_with(|c: char| c.is_ascii_punctuation());
|
||||||
|
if !starts_with_punctuation {
|
||||||
last_item.content.push(' ');
|
last_item.content.push(' ');
|
||||||
}
|
}
|
||||||
last_item.content.push_str(&content);
|
last_item.content.push_str(content.trim());
|
||||||
}
|
}
|
||||||
} else if let Some((last_pts, last_duration)) = ts_duration_iter.last() {
|
} else if let Some((last_pts, last_duration)) = ts_duration_iter.last() {
|
||||||
if let Some(last_item) = translated_items.last_mut() {
|
if let Some(last_item) = translated_items.last_mut() {
|
||||||
|
@ -576,4 +582,36 @@ mod tests {
|
||||||
|
|
||||||
assert!(items.next().is_none());
|
assert!(items.next().is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exhausted_spans_join_punctuation() {
|
||||||
|
let input = "<span>et</span> <span><span>les</span> <span>Clippers</span> <span>sont</span> <span><span>au</span></span> <span>tableau</span><span>,</span> <span>et</span> <span>c'est <span>Norman</span> qui</span> <span>attaque</span> en <span>lisant</span> <span>Max <span>Christie</span>.</span></span>";
|
||||||
|
|
||||||
|
let ts_duration_list = vec![
|
||||||
|
(0.seconds(), 1.seconds()),
|
||||||
|
(1.seconds(), 1.seconds()),
|
||||||
|
(2.seconds(), 1.seconds()),
|
||||||
|
(3.seconds(), 1.seconds()),
|
||||||
|
(4.seconds(), 1.seconds()),
|
||||||
|
(5.seconds(), 1.seconds()),
|
||||||
|
(6.seconds(), 1.seconds()),
|
||||||
|
(7.seconds(), 1.seconds()),
|
||||||
|
(8.seconds(), 1.seconds()),
|
||||||
|
(9.seconds(), 1.seconds()),
|
||||||
|
(10.seconds(), 1.seconds()),
|
||||||
|
(11.seconds(), 1.seconds()),
|
||||||
|
(12.seconds(), 1.seconds()),
|
||||||
|
(13.seconds(), 1.seconds()),
|
||||||
|
(14.seconds(), 1.seconds()),
|
||||||
|
(15.seconds(), 1.seconds()),
|
||||||
|
];
|
||||||
|
|
||||||
|
let items = span_tokenize_items(input, ts_duration_list).into_iter();
|
||||||
|
|
||||||
|
let final_ = items.last().unwrap();
|
||||||
|
|
||||||
|
// when all spans are consumed and punctuation remains as the content,
|
||||||
|
// don't join it with a space with the last item content (Christie .)
|
||||||
|
assert!(final_.content == "Christie.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue