aws/polly: add GstScaletempoTargetDurationMeta to output buffers

When a scaletempo element is placed downstream of the element, it will
be able to rescale the generated audio to the target duration of the
original input text, thus preserving synchronization.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/2012>
This commit is contained in:
Mathieu Duponchelle 2024-12-23 17:40:51 +01:00 committed by GStreamer Marge Bot
parent 3a4d01d1fb
commit 0e6413b827

View file

@ -164,7 +164,7 @@ impl Polly {
.pts()
.ok_or_else(|| anyhow!("Stream with timestamped buffers required"))?;
let duration = inbuf
let input_duration = inbuf
.duration()
.ok_or_else(|| anyhow!("Buffers of stream need to have a duration"))?;
@ -198,7 +198,7 @@ impl Polly {
.text(if settings.ssml_set_max_duration {
format!(
"<speak><prosody amazon:max-duration=\"{}ms\">{data}</prosody></speak>",
duration.mseconds()
input_duration.mseconds()
)
} else {
data.to_owned()
@ -234,7 +234,7 @@ impl Polly {
let overflow = self.settings.lock().unwrap().overflow;
if matches!(overflow, AwsOverflow::Clip) {
let max_expected_bytes = duration
let max_expected_bytes = input_duration
.nseconds()
.mul_div_floor(32_000, 1_000_000_000)
.unwrap()
@ -281,6 +281,13 @@ impl Polly {
buf_mut.set_pts(pts);
buf_mut.set_duration(duration);
if let Ok(mut meta) =
gst::meta::CustomMeta::add(buf_mut, "GstScaletempoTargetDurationMeta")
{
meta.mut_structure()
.set("duration", input_duration.nseconds());
}
if discont {
gst::debug!(CAT, imp = self, "Marking buffer discont");
buf_mut.set_flags(gst::BufferFlags::DISCONT);