google-cloud-playground/src/main.rs

use bytes::BytesMut;
use google_api_proto::google::cloud::speech::v1::streaming_recognize_request::StreamingRequest;
use google_api_proto::google::cloud::speech::v1::{
    recognition_config::AudioEncoding, speech_client::SpeechClient, RecognitionConfig,
    StreamingRecognitionConfig, StreamingRecognizeRequest,
};
use google_authz::{Credentials, GoogleAuthz};
use log::debug;
use tokio::io::AsyncReadExt;
use tonic::transport::Channel;

#[tokio::main]
async fn main() -> eyre::Result<()> {
    tracing_subscriber::fmt::init();
    //console_subscriber::init();

    debug!("starting...");

    let channel = Channel::from_static("https://speech.googleapis.com")
        .connect()
        .await?;
    // let channel_translate = Channel::from_static("https://translate.googleapis.com")
    //     .connect()
    //     .await?;

    let credentials = Credentials::builder()
        .json_file("i-centralvideo-dictate-dev-c184dd68967a.json".as_ref())
        .build()
        .await?;
    let channel = GoogleAuthz::builder(channel)
        .credentials(credentials)
        .build()
        .await;

    debug!("authenticated channel created!");

    let mut client = SpeechClient::new(channel);

    let outbound = async_stream::stream! {
        let request = StreamingRecognizeRequest {
            streaming_request: Some(StreamingRequest::StreamingConfig(
                StreamingRecognitionConfig {
                    config: Some(RecognitionConfig {
                        encoding: AudioEncoding::Flac.into(), // matching current example file
                        sample_rate_hertz: 48000,             // matching current example file
                        language_code: "en-US".to_string(),   // we only support en-US to start with
                        model: "video".to_string(),           // dictate does not set this option
                        use_enhanced: true,                   // dictate does not set this option
                        profanity_filter: true,               // used by Dictate, so we also use it here
                        enable_word_time_offsets: true, // important so we can get the spoken word time ranges
                        max_alternatives: 1,            // make sure the default is used
                        ..Default::default()
                    }),
                    single_utterance: false,
                    interim_results: false,
                },
            )),
        };
        yield request;
        let file = tokio::fs::File::open("some-audio.flac").await.unwrap();
        let mut audio_file = tokio::io::BufReader::new(file);
            // read file chunk
            let mut buffer = [0; 1024 * 5];
            while audio_file.read(&mut buffer).await.is_ok() {
                // send to server
                let request = StreamingRecognizeRequest {
                    streaming_request: Some(StreamingRequest::AudioContent(
                        BytesMut::from(buffer.as_slice()).freeze(),
                    )),
                };
                yield request;
                debug!("added a buffer to the sender queue");
            }
    };

    let response = client
        .streaming_recognize(tonic::Request::new(outbound))
        .await?;
    let mut inbound = response.into_inner();

    while let Some(msg) = inbound.message().await? {
        debug!("Got a message: {:?}", msg);
    }

    Ok(())
}