transcriberbin: add support for consuming secondary audio streams

In some situations, a translated alternate audio stream for a content
might be available.

Instead of going through transcription and translation of the original
audio stream, it may be preferrable for accuracy purposes to simply
transcribe the secondary audio stream.

This MR adds support for doing just that:

* Secondary audio sink pads can be requested as "sink_audio_%u"

* Sometimes audio source pads are added at that point to pass through
  the audio, as "src_audio_%u"

* The main transcription bin now contains per-input stream transcription
  bins. Those can be individually controlled through properties on the
  sink pads, for instance translation-languages can be dynamically set
  per audio stream

* Some properties that originally existed on the main element still
  remain, but are now simply mapped to the always audio sink pad

* Releasing of secondary sink pads is nominally implemented, but not
  tested in states other than NULL

An example launch line for this would be:

```
$ gst-launch-1.0 transcriberbin name=transcriberbin latency=8000 accumulate-time=0 \
      cc-caps="closedcaption/x-cea-708, format=cc_data" sink_audio_0::language-code="es-US" \
      sink_audio_0::translation-languages="languages, transcript=cc3"
    uridecodebin uri=file:///home/meh/Music/chaplin.mkv name=d
      d. ! videoconvert ! transcriberbin.sink_video
      d. ! clocksync ! audioconvert ! transcriberbin.sink_audio
      transcriberbin.src_video ! cea608overlay field=1 ! videoconvert ! autovideosink \
      transcriberbin.src_audio ! audioconvert ! fakesink \
    uridecodebin uri=file:///home/meh/Music/chaplin-spanish.webm name=d2 \
      d2. ! audioconvert ! transcriberbin.sink_audio_0 \
      transcriberbin.src_audio_0 ! fakesink
```

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/1546>
This commit is contained in:
Mathieu Duponchelle 2024-04-19 20:12:46 +02:00
parent 66030f36ad
commit 17d7997137
3 changed files with 1031 additions and 368 deletions

View file

@ -5585,7 +5585,14 @@
"sink_audio": { "sink_audio": {
"caps": "audio/x-raw:\n", "caps": "audio/x-raw:\n",
"direction": "sink", "direction": "sink",
"presence": "always" "presence": "always",
"type": "GstTranscriberSinkPad"
},
"sink_audio_%%u": {
"caps": "audio/x-raw:\n",
"direction": "sink",
"presence": "request",
"type": "GstTranscriberSinkPad"
}, },
"sink_video": { "sink_video": {
"caps": "video/x-raw(ANY):\n", "caps": "video/x-raw(ANY):\n",
@ -5597,6 +5604,12 @@
"direction": "src", "direction": "src",
"presence": "always" "presence": "always"
}, },
"src_audio_%%u": {
"caps": "audio/x-raw:\n",
"direction": "src",
"presence": "sometimes",
"type": "GstTranscriberSrcPad"
},
"src_video": { "src_video": {
"caps": "video/x-raw(ANY):\n", "caps": "video/x-raw(ANY):\n",
"direction": "src", "direction": "src",
@ -5735,6 +5748,7 @@
"construct": false, "construct": false,
"construct-only": false, "construct-only": false,
"controllable": false, "controllable": false,
"default": "languages, transcript=(string)cc1;",
"mutable": "playing", "mutable": "playing",
"readable": true, "readable": true,
"type": "GstStructure", "type": "GstStructure",
@ -6038,6 +6052,79 @@
} }
] ]
}, },
"GstTranscriberSinkPad": {
"hierarchy": [
"GstTranscriberSinkPad",
"GstGhostPad",
"GstProxyPad",
"GstPad",
"GstObject",
"GInitiallyUnowned",
"GObject"
],
"kind": "object",
"properties": {
"language-code": {
"blurb": "The language of the input stream",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "en-US",
"mutable": "playing",
"readable": true,
"type": "gchararray",
"writable": true
},
"mode": {
"blurb": "Which closed caption mode to operate in",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "roll-up2 (2)",
"mutable": "playing",
"readable": true,
"type": "GstTtToCea608Mode",
"writable": true
},
"transcriber": {
"blurb": "The transcriber element to use",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"mutable": "ready",
"readable": true,
"type": "GstElement",
"writable": true
},
"translation-languages": {
"blurb": "A map of language codes to caption channels, e.g. translation-languages=\"languages, transcript={CC1, 708_1}, fr={708_2, CC3}\" will map the French translation to CC1/service 1 and the original transcript to CC3/service 2",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "languages, transcript=(string)cc1;",
"mutable": "playing",
"readable": true,
"type": "GstStructure",
"writable": true
}
}
},
"GstTranscriberSrcPad": {
"hierarchy": [
"GstTranscriberSrcPad",
"GstGhostPad",
"GstProxyPad",
"GstPad",
"GstObject",
"GInitiallyUnowned",
"GObject"
],
"kind": "object"
},
"GstTtToCea608Mode": { "GstTtToCea608Mode": {
"kind": "enum", "kind": "enum",
"values": [ "values": [

File diff suppressed because it is too large Load diff

View file

@ -30,7 +30,15 @@ enum MuxMethod {
} }
glib::wrapper! { glib::wrapper! {
pub struct TranscriberBin(ObjectSubclass<imp::TranscriberBin>) @extends gst::Bin, gst::Element, gst::Object; pub struct TranscriberBin(ObjectSubclass<imp::TranscriberBin>) @extends gst::Bin, gst::Element, gst::Object, @implements gst::ChildProxy;
}
glib::wrapper! {
pub struct TranscriberSinkPad(ObjectSubclass<imp::TranscriberSinkPad>) @extends gst::GhostPad, gst::ProxyPad, gst::Pad, gst::Object;
}
glib::wrapper! {
pub struct TranscriberSrcPad(ObjectSubclass<imp::TranscriberSrcPad>) @extends gst::GhostPad, gst::ProxyPad, gst::Pad, gst::Object;
} }
pub fn register(plugin: &gst::Plugin) -> Result<(), glib::BoolError> { pub fn register(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
@ -38,6 +46,8 @@ pub fn register(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
{ {
CaptionSource::static_type().mark_as_plugin_api(gst::PluginAPIFlags::empty()); CaptionSource::static_type().mark_as_plugin_api(gst::PluginAPIFlags::empty());
MuxMethod::static_type().mark_as_plugin_api(gst::PluginAPIFlags::empty()); MuxMethod::static_type().mark_as_plugin_api(gst::PluginAPIFlags::empty());
TranscriberSinkPad::static_type().mark_as_plugin_api(gst::PluginAPIFlags::empty());
TranscriberSrcPad::static_type().mark_as_plugin_api(gst::PluginAPIFlags::empty());
} }
gst::Element::register( gst::Element::register(