audio/elevenlabs: new speech synthesis element around ElevenLabs API

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/2313>
This commit is contained in:
Mathieu Duponchelle 2025-06-17 20:59:39 +02:00 committed by GStreamer Marge Bot
parent c269ef50dd
commit 6061ddbb5b
10 changed files with 1364 additions and 0 deletions

16
Cargo.lock generated
View file

@ -2807,6 +2807,22 @@ dependencies = [
"num_cpus",
]
[[package]]
name = "gst-plugin-elevenlabs"
version = "0.14.0-alpha.1"
dependencies = [
"anyhow",
"futures",
"gst-plugin-version-helper",
"gstreamer",
"gstreamer-audio",
"gstreamer-base",
"reqwest 0.12.20",
"serde",
"serde_json",
"tokio",
]
[[package]]
name = "gst-plugin-fallbackswitch"
version = "0.14.0-alpha.1"

View file

@ -10,6 +10,7 @@ members = [
"audio/audiofx",
"audio/claxon",
"audio/csound",
"audio/elevenlabs",
"audio/lewton",
"audio/speechmatics",
"audio/spotify",

View file

@ -0,0 +1,47 @@
[package]
name = "gst-plugin-elevenlabs"
version.workspace = true
authors = ["Mathieu Duponchelle <mathieu@centricular.com>"]
repository.workspace = true
license = "MPL-2.0"
description = "GStreamer ElevenLabs plugin"
edition.workspace = true
rust-version.workspace = true
[dependencies]
futures = "0.3"
gst = { workspace = true, features = ["v1_20"] }
gst-base.workspace = true
gst-audio = { workspace = true, features = ["v1_16"] }
tokio = { version = "1", features = [ "full" ] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
anyhow = "1"
reqwest = { version = "0.12" }
[lib]
name = "gstelevenlabs"
crate-type = ["cdylib", "rlib"]
path = "src/lib.rs"
[build-dependencies]
gst-plugin-version-helper.workspace = true
[features]
static = []
capi = []
doc = ["gst/v1_18"]
[package.metadata.capi]
min_version = "0.9.21"
[package.metadata.capi.header]
enabled = false
[package.metadata.capi.library]
install_subdir = "gstreamer-1.0"
versioning = false
import_library = false
[package.metadata.capi.pkg_config]
requires_private = "gstreamer-1.0, gstreamer-base-1.0, gstreamer-audio-1.0, gobject-2.0, glib-2.0, gmodule-2.0"

View file

@ -0,0 +1,3 @@
fn main() {
gst_plugin_version_helper::info()
}

View file

@ -0,0 +1,36 @@
// Copyright (C) 2025 Mathieu Duponchelle <mathieu@centricular.com>
//
// This Source Code Form is subject to the terms of the Mozilla Public License, v2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at
// <https://mozilla.org/MPL/2.0/>.
//
// SPDX-License-Identifier: MPL-2.0
#![allow(clippy::non_send_fields_in_send_ty, unused_doc_comments)]
#![recursion_limit = "128"]
/**
* plugin-elevenlabs:
*
* Since: plugins-rs-0.14.0
*/
use gst::glib;
mod synthesizer;
fn plugin_init(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
synthesizer::register(plugin)?;
Ok(())
}
gst::plugin_define!(
elevenlabs,
env!("CARGO_PKG_DESCRIPTION"),
plugin_init,
concat!(env!("CARGO_PKG_VERSION"), "-", env!("COMMIT_ID")),
"Proprietary",
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_REPOSITORY"),
env!("BUILD_REL_DATE")
);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,42 @@
// Copyright (C) 2025 Mathieu Duponchelle <mathieu@centricular.com>
//
// This Source Code Form is subject to the terms of the Mozilla Public License, v2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at
// <https://mozilla.org/MPL/2.0/>.
//
// SPDX-License-Identifier: MPL-2.0
use gst::glib;
use gst::prelude::*;
mod imp;
glib::wrapper! {
pub struct Synthesizer(ObjectSubclass<imp::Synthesizer>) @extends gst::Element, gst::Object;
}
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy, glib::Enum)]
#[repr(u32)]
#[enum_type(name = "GstElevenLabsOverflow")]
#[non_exhaustive]
pub enum Overflow {
#[enum_value(name = "Clip", nick = "clip")]
Clip = 0,
#[enum_value(name = "Overlap", nick = "overlap")]
Overlap = 1,
#[enum_value(name = "Shift", nick = "shift")]
Shift = 2,
}
pub fn register(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
#[cfg(feature = "doc")]
{
Overflow::static_type().mark_as_plugin_api(gst::PluginAPIFlags::empty());
}
gst::Element::register(
Some(plugin),
"elevenlabssynthesizer",
gst::Rank::NONE,
Synthesizer::static_type(),
)
}

View file

@ -2721,6 +2721,152 @@
"tracers": {},
"url": "https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs"
},
"elevenlabs": {
"description": "GStreamer ElevenLabs plugin",
"elements": {
"elevenlabssynthesizer": {
"author": "Mathieu Duponchelle <mathieu@centricular.com>",
"description": "Text to Speech filter, using ElevenLabs",
"hierarchy": [
"GstElevenLabsSynthesizer",
"GstElement",
"GstObject",
"GInitiallyUnowned",
"GObject"
],
"klass": "Audio/Text/Filter",
"pad-templates": {
"sink": {
"caps": "text/x-raw:\n format: utf8\n",
"direction": "sink",
"presence": "always"
},
"src": {
"caps": "audio/x-raw:\n rate: { (int)22050, (int)48000, (int)44100, (int)24000, (int)16000, (int)8000 }\n channels: 1\n layout: interleaved\n format: S16LE\n",
"direction": "src",
"presence": "always"
}
},
"properties": {
"api-key": {
"blurb": "ElevenLabs API Key",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "NULL",
"mutable": "ready",
"readable": true,
"type": "gchararray",
"writable": true
},
"language-code": {
"blurb": "An optional language code (ISO 639-1), useful with certain models",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "NULL",
"mutable": "ready",
"readable": true,
"type": "gchararray",
"writable": true
},
"latency": {
"blurb": "Amount of milliseconds to allow ElevenLabs",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "2000",
"max": "-1",
"min": "0",
"mutable": "ready",
"readable": true,
"type": "guint",
"writable": true
},
"model-id": {
"blurb": "ElevenLabs Model ID, see https://help.elevenlabs.io/hc/en-us/articles/21811236079505-How-do-I-find-the-model-ID",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "eleven_flash_v2_5",
"mutable": "ready",
"readable": true,
"type": "gchararray",
"writable": true
},
"overflow": {
"blurb": "Defines how output audio with a longer duration than input text should be handled",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "clip (0)",
"mutable": "ready",
"readable": true,
"type": "GstElevenLabsOverflow",
"writable": true
},
"retry-with-speed": {
"blurb": "When synthesis results in larger duration, retry with higher speed",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "true",
"mutable": "ready",
"readable": true,
"type": "gboolean",
"writable": true
},
"voice-id": {
"blurb": "ElevenLabs Voice ID, see https://elevenlabs.io/app/voice-library",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "9BWtsMINqrJLrRacOk9x",
"mutable": "ready",
"readable": true,
"type": "gchararray",
"writable": true
}
},
"rank": "none"
}
},
"filename": "gstelevenlabs",
"license": "Proprietary",
"other-types": {
"GstElevenLabsOverflow": {
"kind": "enum",
"values": [
{
"desc": "Clip",
"name": "clip",
"value": "0"
},
{
"desc": "Overlap",
"name": "overlap",
"value": "1"
},
{
"desc": "Shift",
"name": "shift",
"value": "2"
}
]
}
},
"package": "gst-plugin-elevenlabs",
"source": "gst-plugin-elevenlabs",
"tracers": {},
"url": "https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs"
},
"fallbackswitch": {
"description": "GStreamer Fallback Switcher and Source Plugin",
"elements": {

View file

@ -197,6 +197,7 @@ plugins = {
'extra-deps': {'dav1d': ['>=1.3']},
'features': ['gst/v1_18', 'gst-base/v1_18', 'gst-video/v1_18'],
},
'elevenlabs': {'library': 'libgstelevenlabs'},
'ffv1': {'library': 'libgstffv1'},
'flavors': {'library': 'libgstrsflv'},
'gif': {

View file

@ -7,6 +7,7 @@ option('analytics', type: 'feature', value: 'auto', description: 'Build analytic
option('audiofx', type: 'feature', value: 'auto', description: 'Build audiofx plugin')
option('claxon', type: 'feature', value: 'auto', description: 'Build claxon plugin')
option('csound', type: 'feature', value: 'auto', description: 'Build csound plugin')
option('elevenlabs', type: 'feature', value: 'auto', description: 'Build elevenlabs plugin')
option('lewton', type: 'feature', value: 'auto', description: 'Build lewton plugin')
option('spotify', type: 'feature', value: 'auto', description: 'Build spotify plugin')
option('speechmatics', type: 'feature', value: 'auto', description: 'Build speechmatics plugin')