diff --git a/Cargo.toml b/Cargo.toml index 311cc579..8dbf9250 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "audio/claxon", "audio/csound", "audio/lewton", + "audio/transcribe", "generic/file", "generic/sodium", "generic/threadshare", diff --git a/audio/transcribe/Cargo.toml b/audio/transcribe/Cargo.toml new file mode 100644 index 00000000..ef14a588 --- /dev/null +++ b/audio/transcribe/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "gst-plugin-transcribe" +version = "0.1.0" +authors = ["Jordan Petridis "] +edition = "2018" +# FIXME: licence + +[dependencies] +glib = { git = "https://github.com/gtk-rs/glib" } +gst = { git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs", features = ["v1_16"], package = "gstreamer" } +gst_base = { git = "https://gitlab.freedesktop.org/gstreamer/gstreamer-rs", features = ["v1_16"], package ="gstreamer-base" } +rusoto_core = "0.43.0-beta.1" +rusoto_credential = "0.43.0-beta.1" +rusoto_transcribe = "0.43.0-beta.1" +rusoto_signature = "0.43.0-beta.1" +reqwest = { version = "0.10", features = ["cookies", "gzip"] } +futures = "0.3" +tokio = { version = "0.2", features = ["time", "rt-threaded"] } +async-tungstenite = { version = "0.4", features = ["tokio", "tokio-runtime", "tokio-tls"] } +nom = "5.1.1" +crc = "1.8.1" +byteorder = "1.3.4" + +[lib] +name = "gsttranscribe" +crate-type = ["cdylib", "rlib"] +path = "src/lib.rs" + +[build-dependencies] +gst-plugin-version-helper = { path="../../version-helper" } diff --git a/audio/transcribe/build.rs b/audio/transcribe/build.rs new file mode 100644 index 00000000..fe307a43 --- /dev/null +++ b/audio/transcribe/build.rs @@ -0,0 +1,5 @@ +use gst_plugin_version_helper; + +fn main() { + gst_plugin_version_helper::get_info() +} diff --git a/audio/transcribe/src/lib.rs b/audio/transcribe/src/lib.rs new file mode 100644 index 00000000..00ffacc3 --- /dev/null +++ b/audio/transcribe/src/lib.rs @@ -0,0 +1,31 @@ +// FIXME: add lgpl 2.1 license + +#![crate_type = "cdylib"] + +#[macro_use] +extern crate lazy_static; + +#[macro_use] +extern crate glib; +#[macro_use] +extern crate gstreamer as gst; + +pub mod packet; + +pub mod aws_transcribe_parse; + +fn plugin_init(plugin: &gst::Plugin) -> Result<(), glib::BoolError> { + aws_transcribe_parse::register(plugin) +} + +gst::gst_plugin_define!( + transcribe, + env!("CARGO_PKG_DESCRIPTION"), + plugin_init, + concat!(env!("CARGO_PKG_VERSION"), "-", env!("COMMIT_ID")), + "LGPL", + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_REPOSITORY"), + env!("BUILD_REL_DATE") +); diff --git a/audio/transcribe/src/packet.rs b/audio/transcribe/src/packet.rs new file mode 100644 index 00000000..22406e80 --- /dev/null +++ b/audio/transcribe/src/packet.rs @@ -0,0 +1,154 @@ +use byteorder::{BigEndian, WriteBytesExt}; +use crc::crc32; +use nom::{ + self, bytes::complete::take, combinator::map_res, multi::many0, number::complete::be_u16, + number::complete::be_u32, number::complete::be_u8, sequence::tuple, IResult, +}; +use std::io::{self, Write}; + +#[derive(Debug)] +pub struct Prelude { + total_bytes: u32, + header_bytes: u32, + prelude_crc: u32, +} + +#[derive(Debug)] +// FIXME: make private +pub struct Header { + pub name: String, + pub value_type: u8, + pub value: String, +} + +#[derive(Debug)] +pub struct Packet<'a> { + prelude: Prelude, + pub headers: Vec
, + pub payload: &'a [u8], + msg_crc: u32, +} + +fn write_header(w: &mut W, header: &Header) -> Result<(), io::Error> { + w.write_u8(header.name.len() as u8)?; + w.write_all(header.name.as_bytes())?; + w.write_u8(header.value_type)?; + w.write_u16::(header.value.len() as u16)?; + w.write_all(header.value.as_bytes())?; + Ok(()) +} + +fn write_headers(w: &mut W, headers: &[Header]) -> Result<(), io::Error> { + for header in headers { + write_header(w, header)?; + } + Ok(()) +} + +pub fn encode_packet(payload: &[u8], headers: &[Header]) -> Result, io::Error> { + let mut res = Vec::with_capacity(1024); + + // Total length + res.write_u32::(0)?; + // Header length + res.write_u32::(0)?; + // Prelude CRC32 placeholder + res.write_u32::(0)?; + + // Write all headers + write_headers(&mut res, headers)?; + + // Rewrite header length + let header_length = res.len() - 12; + (&mut res[4..8]).write_u32::(header_length as u32)?; + + // Write payload + res.write_all(payload)?; + + // Rewrite total length + let total_length = res.len() + 4; + (&mut res[0..4]).write_u32::(total_length as u32)?; + + // Rewrite the prelude crc since we replaced the lengths + let prelude_crc = crc32::checksum_ieee(&res[0..8]); + (&mut res[8..12]).write_u32::(prelude_crc)?; + + // Message CRC + let message_crc = crc32::checksum_ieee(&res); + res.write_u32::(message_crc)?; + + Ok(res) +} + +fn parse_prelude(input: &[u8]) -> IResult<&[u8], Prelude> { + map_res( + tuple((be_u32, be_u32, be_u32)), + |(total_bytes, header_bytes, prelude_crc)| { + let sum = crc32::checksum_ieee(&input[0..8]); + if prelude_crc != sum { + return Err(nom::Err::Error(( + "Prelude CRC doesn't match", + nom::error::ErrorKind::MapRes, + ))); + } + + Ok(Prelude { + total_bytes, + header_bytes, + prelude_crc, + }) + }, + )(input) +} + +fn parse_header(input: &[u8]) -> IResult<&[u8], Header> { + let (input, header_length) = be_u8(input)?; + let (input, name) = map_res(take(header_length), std::str::from_utf8)(input)?; + let (input, value_type) = be_u8(input)?; + let (input, value_length) = be_u16(input)?; + let (input, value) = map_res(take(value_length), std::str::from_utf8)(input)?; + + let header = Header { + name: name.to_string(), + value_type, + value: value.to_string(), + }; + + Ok((input, header)) +} + +pub fn parse_packet(input: &[u8]) -> IResult<&[u8], Packet> { + let (remainder, prelude) = parse_prelude(input)?; + + // Check the crc of the whole input + let sum = crc32::checksum_ieee(&input[..input.len() - 4]); + let (_, msg_crc) = be_u32(&input[input.len() - 4..])?; + + if msg_crc != sum { + // FIXME: a better errortype than mapres + return Err(nom::Err::Error(( + b"Prelude CRC doesn't match", + nom::error::ErrorKind::MapRes, + ))); + } + + let (remainder, header_input) = take(prelude.header_bytes)(remainder)?; + let (_, headers) = many0(parse_header)(header_input)?; + //dbg!(&headers); + + let payload_length = prelude.total_bytes - prelude.header_bytes - 4 - 12; + let (remainder, payload) = take(payload_length)(remainder)?; + + // only the message_crc we check before should be remaining now + assert_eq!(remainder.len(), 4); + + Ok(( + input, + Packet { + prelude, + headers, + payload, + msg_crc, + }, + )) +}