mirror of
https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs.git
synced 2025-01-18 07:05:45 +00:00
net/aws: use aws-sdk-transcribestreaming
Switch from manual webservice client impl to `aws-sdk-transcribestreaming`. Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/1104>
This commit is contained in:
parent
57f365979c
commit
00153754bb
5 changed files with 473 additions and 807 deletions
|
@ -619,7 +619,7 @@
|
||||||
"rank": "none"
|
"rank": "none"
|
||||||
},
|
},
|
||||||
"awstranscriber": {
|
"awstranscriber": {
|
||||||
"author": "Jordan Petridis <jordan@centricular.com>, Mathieu Duponchelle <mathieu@centricular.com>",
|
"author": "Jordan Petridis <jordan@centricular.com>, Mathieu Duponchelle <mathieu@centricular.com>, François Laignel <francois@centricular.com>",
|
||||||
"description": "Speech to Text filter, using AWS transcribe",
|
"description": "Speech to Text filter, using AWS transcribe",
|
||||||
"hierarchy": [
|
"hierarchy": [
|
||||||
"GstAwsTranscriber",
|
"GstAwsTranscriber",
|
||||||
|
|
|
@ -11,36 +11,30 @@ edition = "2021"
|
||||||
rust-version = "1.66"
|
rust-version = "1.66"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
bytes = "1.0"
|
async-stream = "0.3.4"
|
||||||
futures = "0.3"
|
base32 = "0.4"
|
||||||
gst = { package = "gstreamer", git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs" }
|
|
||||||
gst-base = { package = "gstreamer-base", git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs" }
|
|
||||||
gst-audio = { package = "gstreamer-audio", git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs", features = ["v1_16"] }
|
|
||||||
aws-config = "0.54.0"
|
aws-config = "0.54.0"
|
||||||
aws-sdk-s3 = "0.24.0"
|
aws-sdk-s3 = "0.24.0"
|
||||||
aws-sdk-transcribe = "0.24.0"
|
aws-sdk-transcribestreaming = "0.24.0"
|
||||||
aws-types = "0.54.0"
|
aws-types = "0.54.0"
|
||||||
aws-credential-types = "0.54.0"
|
aws-credential-types = "0.54.0"
|
||||||
aws-sig-auth = "0.54.0"
|
aws-sig-auth = "0.54.0"
|
||||||
aws-smithy-http = { version = "0.54.0", features = [ "rt-tokio" ] }
|
aws-smithy-http = { version = "0.54.0", features = [ "rt-tokio" ] }
|
||||||
aws-smithy-types = "0.54.0"
|
aws-smithy-types = "0.54.0"
|
||||||
|
bytes = "1.0"
|
||||||
|
futures = "0.3"
|
||||||
|
gio = { git = "https://github.com/gtk-rs/gtk-rs-core.git", package = "gio" }
|
||||||
|
gst = { package = "gstreamer", git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs" }
|
||||||
|
gst-base = { package = "gstreamer-base", git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs" }
|
||||||
|
gst-audio = { package = "gstreamer-audio", git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs", features = ["v1_16"] }
|
||||||
http = "0.2.7"
|
http = "0.2.7"
|
||||||
chrono = "0.4"
|
once_cell = "1.0"
|
||||||
url = "2"
|
|
||||||
percent-encoding = "2"
|
percent-encoding = "2"
|
||||||
tokio = { version = "1.0", features = [ "full" ] }
|
tokio = { version = "1.0", features = [ "full" ] }
|
||||||
async-tungstenite = { version = "0.20", features = ["tokio", "tokio-runtime", "tokio-native-tls"] }
|
|
||||||
nom = "7"
|
|
||||||
crc = "3"
|
|
||||||
byteorder = "1.3.4"
|
|
||||||
once_cell = "1.0"
|
|
||||||
serde = "1"
|
serde = "1"
|
||||||
serde_derive = "1"
|
serde_derive = "1"
|
||||||
serde_json = "1"
|
serde_json = "1"
|
||||||
atomic_refcell = "0.1"
|
url = "2"
|
||||||
base32 = "0.4"
|
|
||||||
backoff = { version = "0.4", features = [ "futures", "tokio" ] }
|
|
||||||
gio = { git = "https://github.com/gtk-rs/gtk-rs-core.git", package = "gio" }
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
chrono = { version = "0.4", features = [ "alloc" ] }
|
chrono = { version = "0.4", features = [ "alloc" ] }
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -10,7 +10,8 @@ use gst::glib;
|
||||||
use gst::prelude::*;
|
use gst::prelude::*;
|
||||||
|
|
||||||
mod imp;
|
mod imp;
|
||||||
mod packet;
|
|
||||||
|
use aws_sdk_transcribestreaming::model::{PartialResultsStability, VocabularyFilterMethod};
|
||||||
|
|
||||||
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy, glib::Enum)]
|
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy, glib::Enum)]
|
||||||
#[repr(u32)]
|
#[repr(u32)]
|
||||||
|
@ -31,6 +32,17 @@ pub enum AwsTranscriberResultStability {
|
||||||
Low = 2,
|
Low = 2,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<AwsTranscriberResultStability> for PartialResultsStability {
|
||||||
|
fn from(val: AwsTranscriberResultStability) -> Self {
|
||||||
|
use AwsTranscriberResultStability::*;
|
||||||
|
match val {
|
||||||
|
High => PartialResultsStability::High,
|
||||||
|
Medium => PartialResultsStability::Medium,
|
||||||
|
Low => PartialResultsStability::Low,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy, glib::Enum)]
|
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy, glib::Enum)]
|
||||||
#[repr(u32)]
|
#[repr(u32)]
|
||||||
#[enum_type(name = "GstAwsTranscriberVocabularyFilterMethod")]
|
#[enum_type(name = "GstAwsTranscriberVocabularyFilterMethod")]
|
||||||
|
@ -44,6 +56,17 @@ pub enum AwsTranscriberVocabularyFilterMethod {
|
||||||
Tag = 2,
|
Tag = 2,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<AwsTranscriberVocabularyFilterMethod> for VocabularyFilterMethod {
|
||||||
|
fn from(val: AwsTranscriberVocabularyFilterMethod) -> Self {
|
||||||
|
use AwsTranscriberVocabularyFilterMethod::*;
|
||||||
|
match val {
|
||||||
|
Mask => VocabularyFilterMethod::Mask,
|
||||||
|
Remove => VocabularyFilterMethod::Remove,
|
||||||
|
Tag => VocabularyFilterMethod::Tag,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
glib::wrapper! {
|
glib::wrapper! {
|
||||||
pub struct Transcriber(ObjectSubclass<imp::Transcriber>) @extends gst::Element, gst::Object;
|
pub struct Transcriber(ObjectSubclass<imp::Transcriber>) @extends gst::Element, gst::Object;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,174 +0,0 @@
|
||||||
// Copyright (C) 2020 Jordan Petridis <jordan@centricular.com>
|
|
||||||
//
|
|
||||||
// This Source Code Form is subject to the terms of the Mozilla Public License, v2.0.
|
|
||||||
// If a copy of the MPL was not distributed with this file, You can obtain one at
|
|
||||||
// <https://mozilla.org/MPL/2.0/>.
|
|
||||||
//
|
|
||||||
// SPDX-License-Identifier: MPL-2.0
|
|
||||||
|
|
||||||
use byteorder::{BigEndian, WriteBytesExt};
|
|
||||||
use nom::{
|
|
||||||
self, bytes::complete::take, combinator::map_res, multi::many0, number::complete::be_u16,
|
|
||||||
number::complete::be_u32, number::complete::be_u8, sequence::tuple, IResult,
|
|
||||||
};
|
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::io::{self, Write};
|
|
||||||
|
|
||||||
const CRC: crc::Crc<u32> = crc::Crc::<u32>::new(&crc::CRC_32_ISO_HDLC);
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
struct Prelude {
|
|
||||||
total_bytes: u32,
|
|
||||||
header_bytes: u32,
|
|
||||||
#[allow(dead_code)]
|
|
||||||
prelude_crc: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Header {
|
|
||||||
pub name: Cow<'static, str>,
|
|
||||||
pub value_type: u8,
|
|
||||||
pub value: Cow<'static, str>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Packet<'a> {
|
|
||||||
#[allow(dead_code)]
|
|
||||||
prelude: Prelude,
|
|
||||||
headers: Vec<Header>,
|
|
||||||
pub payload: &'a [u8],
|
|
||||||
#[allow(dead_code)]
|
|
||||||
msg_crc: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn write_header<W: Write>(w: &mut W, header: &Header) -> Result<(), io::Error> {
|
|
||||||
w.write_u8(header.name.len() as u8)?;
|
|
||||||
w.write_all(header.name.as_bytes())?;
|
|
||||||
w.write_u8(header.value_type)?;
|
|
||||||
w.write_u16::<BigEndian>(header.value.len() as u16)?;
|
|
||||||
w.write_all(header.value.as_bytes())?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn write_headers<W: Write>(w: &mut W, headers: &[Header]) -> Result<(), io::Error> {
|
|
||||||
for header in headers {
|
|
||||||
write_header(w, header)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encode_packet(payload: &[u8], headers: &[Header]) -> Result<Vec<u8>, io::Error> {
|
|
||||||
let mut res = Vec::with_capacity(1024);
|
|
||||||
|
|
||||||
// Total length
|
|
||||||
res.write_u32::<BigEndian>(0)?;
|
|
||||||
// Header length
|
|
||||||
res.write_u32::<BigEndian>(0)?;
|
|
||||||
// Prelude CRC32 placeholder
|
|
||||||
res.write_u32::<BigEndian>(0)?;
|
|
||||||
|
|
||||||
// Write all headers
|
|
||||||
write_headers(&mut res, headers)?;
|
|
||||||
|
|
||||||
// Rewrite header length
|
|
||||||
let header_length = res.len() - 12;
|
|
||||||
(&mut res[4..8]).write_u32::<BigEndian>(header_length as u32)?;
|
|
||||||
|
|
||||||
// Write payload
|
|
||||||
res.write_all(payload)?;
|
|
||||||
|
|
||||||
// Rewrite total length
|
|
||||||
let total_length = res.len() + 4;
|
|
||||||
(&mut res[0..4]).write_u32::<BigEndian>(total_length as u32)?;
|
|
||||||
|
|
||||||
// Rewrite the prelude crc since we replaced the lengths
|
|
||||||
let prelude_crc = CRC.checksum(&res[0..8]);
|
|
||||||
(&mut res[8..12]).write_u32::<BigEndian>(prelude_crc)?;
|
|
||||||
|
|
||||||
// Message CRC
|
|
||||||
let message_crc = CRC.checksum(&res);
|
|
||||||
res.write_u32::<BigEndian>(message_crc)?;
|
|
||||||
|
|
||||||
Ok(res)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_prelude(input: &[u8]) -> IResult<&[u8], Prelude> {
|
|
||||||
map_res(
|
|
||||||
tuple((be_u32, be_u32, be_u32)),
|
|
||||||
|(total_bytes, header_bytes, prelude_crc)| {
|
|
||||||
let sum = CRC.checksum(&input[0..8]);
|
|
||||||
if prelude_crc != sum {
|
|
||||||
return Err(nom::Err::Error((
|
|
||||||
"Prelude CRC doesn't match",
|
|
||||||
nom::error::ErrorKind::MapRes,
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Prelude {
|
|
||||||
total_bytes,
|
|
||||||
header_bytes,
|
|
||||||
prelude_crc,
|
|
||||||
})
|
|
||||||
},
|
|
||||||
)(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_header(input: &[u8]) -> IResult<&[u8], Header> {
|
|
||||||
let (input, header_length) = be_u8(input)?;
|
|
||||||
let (input, name) = map_res(take(header_length), std::str::from_utf8)(input)?;
|
|
||||||
let (input, value_type) = be_u8(input)?;
|
|
||||||
let (input, value_length) = be_u16(input)?;
|
|
||||||
let (input, value) = map_res(take(value_length), std::str::from_utf8)(input)?;
|
|
||||||
|
|
||||||
let header = Header {
|
|
||||||
name: name.to_string().into(),
|
|
||||||
value_type,
|
|
||||||
value: value.to_string().into(),
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok((input, header))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn packet_is_exception(packet: &Packet) -> bool {
|
|
||||||
for header in &packet.headers {
|
|
||||||
if header.name == ":message-type" && header.value == "exception" {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
false
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse_packet(input: &[u8]) -> IResult<&[u8], Packet> {
|
|
||||||
let (remainder, prelude) = parse_prelude(input)?;
|
|
||||||
|
|
||||||
// Check the crc of the whole input
|
|
||||||
let sum = CRC.checksum(&input[..input.len() - 4]);
|
|
||||||
let (_, msg_crc) = be_u32(&input[input.len() - 4..])?;
|
|
||||||
|
|
||||||
if msg_crc != sum {
|
|
||||||
return Err(nom::Err::Error(nom::error::Error::new(
|
|
||||||
b"Prelude CRC doesn't match",
|
|
||||||
nom::error::ErrorKind::MapRes,
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
let (remainder, header_input) = take(prelude.header_bytes)(remainder)?;
|
|
||||||
let (_, headers) = many0(parse_header)(header_input)?;
|
|
||||||
|
|
||||||
let payload_length = prelude.total_bytes - prelude.header_bytes - 4 - 12;
|
|
||||||
let (remainder, payload) = take(payload_length)(remainder)?;
|
|
||||||
|
|
||||||
// only the message_crc we check before should be remaining now
|
|
||||||
assert_eq!(remainder.len(), 4);
|
|
||||||
|
|
||||||
Ok((
|
|
||||||
input,
|
|
||||||
Packet {
|
|
||||||
prelude,
|
|
||||||
headers,
|
|
||||||
payload,
|
|
||||||
msg_crc,
|
|
||||||
},
|
|
||||||
))
|
|
||||||
}
|
|
Loading…
Reference in a new issue