whispering/whispering/cli.py

#!/usr/bin/env python3

import argparse
import asyncio
import queue
import sys
from logging import DEBUG, INFO, basicConfig, getLogger
from typing import Optional, Union

import sounddevice as sd
import torch
from whisper import available_models
from whisper.audio import N_FRAMES, SAMPLE_RATE
from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE

from whispering.pbar import ProgressBar
from whispering.schema import Context, WhisperConfig
from whispering.serve import serve_with_websocket
from whispering.transcriber import WhisperStreamingTranscriber
from whispering.websocket_client import run_websocket_client

logger = getLogger(__name__)


def transcribe_from_mic(
    *,
    wsp: WhisperStreamingTranscriber,
    sd_device: Optional[Union[int, str]],
    num_block: int,
    ctx: Context,
    no_progress: bool,
) -> None:
    q = queue.Queue()

    def sd_callback(indata, frames, time, status):
        if status:
            logger.warning(status)
        q.put(indata.ravel())

    logger.info("Ready to transcribe")
    with sd.InputStream(
        samplerate=SAMPLE_RATE,
        blocksize=N_FRAMES * num_block,
        device=sd_device,
        dtype="float32",
        channels=1,
        callback=sd_callback,
    ):
        idx: int = 0
        while True:
            logger.debug(f"Audio #: {idx}, The rest of queue: {q.qsize()}")

            if no_progress:
                audio = q.get()
            else:
                pbar_thread = ProgressBar(
                    num_block=num_block,  # TODO: set more accurate value
                )
                try:
                    audio = q.get()
                except KeyboardInterrupt:
                    pbar_thread.kill()
                    return
                pbar_thread.kill()

            logger.debug(f"Got. The rest of queue: {q.qsize()}")
            if not no_progress:
                sys.stderr.write("Analyzing")
                sys.stderr.flush()

            for chunk in wsp.transcribe(audio=audio, ctx=ctx):
                if not no_progress:
                    sys.stderr.write("\r")
                    sys.stderr.flush()
                print(f"{chunk.start:.2f}->{chunk.end:.2f}\t{chunk.text}")
                if not no_progress:
                    sys.stderr.write("Analyzing")
                    sys.stderr.flush()
            idx += 1
            if not no_progress:
                sys.stderr.write("\r")
                sys.stderr.flush()


def get_opts() -> argparse.Namespace:
    parser = argparse.ArgumentParser()

    group_model = parser.add_argument_group("Whisper model options")
    group_model.add_argument(
        "--model",
        type=str,
        choices=available_models(),
    )
    group_model.add_argument(
        "--language",
        type=str,
        default=None,
        choices=sorted(LANGUAGES.keys())
        + sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),
    )
    group_model.add_argument(
        "--device",
        default="cuda" if torch.cuda.is_available() else "cpu",
        help="device to use for PyTorch inference",
    )

    group_ws = parser.add_argument_group("WebSocket options")
    group_ws.add_argument(
        "--host",
        default="0.0.0.0",
        help="host of websocker server",
    )
    group_ws.add_argument(
        "--port",
        type=int,
        help="Port number of websocker server",
    )

    group_ctx = parser.add_argument_group("Parsing options")
    group_ctx.add_argument(
        "--beam_size",
        "-b",
        type=int,
        default=5,
    )
    group_ctx.add_argument(
        "--num_block",
        "-n",
        type=int,
        default=160,
        help="Number of operation unit",
    )
    group_ctx.add_argument(
        "--temperature",
        "-t",
        type=float,
        action="append",
        default=[],
    )
    group_ctx.add_argument(
        "--allow-padding",
        action="store_true",
    )
    group_ctx.add_argument(
        "--no-progress",
        action="store_true",
    )
    group_ctx.add_argument(
        "--no-vad",
        action="store_true",
    )
    group_ctx.add_argument(
        "--mode",
        choices=["client"],
    )

    group_misc = parser.add_argument_group("Other options")
    group_misc.add_argument(
        "--mic",
    )
    group_misc.add_argument(
        "--show-devices",
        action="store_true",
    )
    group_misc.add_argument(
        "--debug",
        action="store_true",
    )

    opts = parser.parse_args()

    if opts.beam_size <= 0:
        opts.beam_size = None
    if len(opts.temperature) == 0:
        opts.temperature = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
    opts.temperature = sorted(set(opts.temperature))

    try:
        opts.mic = int(opts.mic)
    except Exception:
        pass
    return opts


def get_wshiper(*, opts) -> WhisperStreamingTranscriber:
    config = WhisperConfig(
        model_name=opts.model,
        language=opts.language,
        device=opts.device,
    )

    logger.debug(f"WhisperConfig: {config}")
    wsp = WhisperStreamingTranscriber(config=config)
    return wsp


def get_context(*, opts) -> Context:
    ctx = Context(
        beam_size=opts.beam_size,
        temperatures=opts.temperature,
        allow_padding=opts.allow_padding,
        vad=not opts.no_vad,
    )
    logger.debug(f"Context: {ctx}")
    return ctx


def show_devices():
    devices = sd.query_devices()
    for i, device in enumerate(devices):
        if device["max_input_channels"] > 0:
            print(f"{i}: {device['name']}")


def main() -> None:
    opts = get_opts()

    basicConfig(
        level=DEBUG if opts.debug else INFO,
        format="[%(asctime)s] %(module)s.%(funcName)s:%(lineno)d %(levelname)s -> %(message)s",
    )

    if opts.show_devices:
        return show_devices()

    if opts.host is not None and opts.port is not None:
        if opts.mode == "client":
            assert opts.language is None
            assert opts.model is None
            try:
                asyncio.run(
                    run_websocket_client(
                        opts=opts,
                    )
                )
            except KeyboardInterrupt:
                pass
        else:
            assert opts.language is not None
            assert opts.model is not None
            wsp = get_wshiper(opts=opts)
            ctx: Context = get_context(opts=opts)
            asyncio.run(
                serve_with_websocket(
                    wsp=wsp,
                    host=opts.host,
                    port=opts.port,
                    ctx=ctx,
                )
            )
    else:
        assert opts.language is not None
        assert opts.model is not None
        wsp = get_wshiper(opts=opts)
        ctx: Context = get_context(opts=opts)
        transcribe_from_mic(
            wsp=wsp,
            sd_device=opts.mic,
            num_block=opts.num_block,
            no_progress=opts.no_progress,
            ctx=ctx,
        )


if __name__ == "__main__":
    main()
Initial commit 2022-09-23 10:20:11 +00:00			`#!/usr/bin/env python3`

			`import argparse`
Add websocket server 2022-09-24 11:45:20 +00:00			`import asyncio`
Initial commit 2022-09-23 10:20:11 +00:00			`import queue`
Add progress bar and --no-progress option 2022-09-29 12:59:12 +00:00			`import sys`
Fix lint 2022-09-23 13:13:25 +00:00			`from logging import DEBUG, INFO, basicConfig, getLogger`
Add --mic option 2022-09-23 10:28:11 +00:00			`from typing import Optional, Union`
Initial commit 2022-09-23 10:20:11 +00:00
			`import sounddevice as sd`
			`import torch`
			`from whisper import available_models`
			`from whisper.audio import N_FRAMES, SAMPLE_RATE`
			`from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE`

Add progress bar and --no-progress option 2022-09-29 12:59:12 +00:00			`from whispering.pbar import ProgressBar`
Add Context to manage context 2022-09-29 11:14:56 +00:00			`from whispering.schema import Context, WhisperConfig`
Renamed whisper_streaming to whispering 2022-09-25 15:29:20 +00:00			`from whispering.serve import serve_with_websocket`
			`from whispering.transcriber import WhisperStreamingTranscriber`
			`from whispering.websocket_client import run_websocket_client`
Initial commit 2022-09-23 10:20:11 +00:00
			`logger = getLogger(__name__)`


Add --mic option 2022-09-23 10:28:11 +00:00			`def transcribe_from_mic(`
			`*,`
Add websocket server 2022-09-24 11:45:20 +00:00			`wsp: WhisperStreamingTranscriber,`
Add --mic option 2022-09-23 10:28:11 +00:00			`sd_device: Optional[Union[int, str]],`
Add --num_block option 2022-09-23 11:45:18 +00:00			`num_block: int,`
Updated Context 2022-09-29 11:43:49 +00:00			`ctx: Context,`
Add progress bar and --no-progress option 2022-09-29 12:59:12 +00:00			`no_progress: bool,`
Add --mic option 2022-09-23 10:28:11 +00:00			`) -> None:`
Initial commit 2022-09-23 10:20:11 +00:00			`q = queue.Queue()`

			`def sd_callback(indata, frames, time, status):`
			`if status:`
			`logger.warning(status)`
			`q.put(indata.ravel())`

			`logger.info("Ready to transcribe")`
			`with sd.InputStream(`
			`samplerate=SAMPLE_RATE,`
Add --num_block option 2022-09-23 11:45:18 +00:00			`blocksize=N_FRAMES * num_block,`
Initial commit 2022-09-23 10:20:11 +00:00			`device=sd_device,`
			`dtype="float32",`
			`channels=1,`
			`callback=sd_callback,`
			`):`
Add segment debug log 2022-09-23 13:44:16 +00:00			`idx: int = 0`
Initial commit 2022-09-23 10:20:11 +00:00			`while True:`
Fix 2022-10-02 10:47:17 +00:00			`logger.debug(f"Audio #: {idx}, The rest of queue: {q.qsize()}")`
Add progress bar and --no-progress option 2022-09-29 12:59:12 +00:00
			`if no_progress:`
Fix 2022-10-02 10:47:17 +00:00			`audio = q.get()`
Add progress bar and --no-progress option 2022-09-29 12:59:12 +00:00			`else:`
Add messages 2022-09-29 13:07:44 +00:00			`pbar_thread = ProgressBar(`
			`num_block=num_block, # TODO: set more accurate value`
			`)`
Add progress bar and --no-progress option 2022-09-29 12:59:12 +00:00			`try:`
Fix 2022-10-02 10:47:17 +00:00			`audio = q.get()`
Add progress bar and --no-progress option 2022-09-29 12:59:12 +00:00			`except KeyboardInterrupt:`
			`pbar_thread.kill()`
			`return`
			`pbar_thread.kill()`

			`logger.debug(f"Got. The rest of queue: {q.qsize()}")`
			`if not no_progress:`
			`sys.stderr.write("Analyzing")`
			`sys.stderr.flush()`

Fix 2022-10-02 10:47:17 +00:00			`for chunk in wsp.transcribe(audio=audio, ctx=ctx):`
Add progress bar and --no-progress option 2022-09-29 12:59:12 +00:00			`if not no_progress:`
			`sys.stderr.write("\r")`
			`sys.stderr.flush()`
Improved time output 2022-09-23 11:26:32 +00:00			`print(f"{chunk.start:.2f}->{chunk.end:.2f}\t{chunk.text}")`
Add progress bar and --no-progress option 2022-09-29 12:59:12 +00:00			`if not no_progress:`
			`sys.stderr.write("Analyzing")`
			`sys.stderr.flush()`
Add segment debug log 2022-09-23 13:44:16 +00:00			`idx += 1`
Add progress bar and --no-progress option 2022-09-29 12:59:12 +00:00			`if not no_progress:`
			`sys.stderr.write("\r")`
			`sys.stderr.flush()`
Initial commit 2022-09-23 10:20:11 +00:00

			`def get_opts() -> argparse.Namespace:`
			`parser = argparse.ArgumentParser()`
Add argparse group 2022-10-02 12:13:36 +00:00
			`group_model = parser.add_argument_group("Whisper model options")`
			`group_model.add_argument(`
			`"--model",`
			`type=str,`
			`choices=available_models(),`
			`)`
			`group_model.add_argument(`
Initial commit 2022-09-23 10:20:11 +00:00			`"--language",`
			`type=str,`
			`default=None,`
			`choices=sorted(LANGUAGES.keys())`
			`+ sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),`
			`)`
Add argparse group 2022-10-02 12:13:36 +00:00			`group_model.add_argument(`
Initial commit 2022-09-23 10:20:11 +00:00			`"--device",`
			`default="cuda" if torch.cuda.is_available() else "cpu",`
			`help="device to use for PyTorch inference",`
			`)`
Add argparse group 2022-10-02 12:13:36 +00:00
			`group_ws = parser.add_argument_group("WebSocket options")`
			`group_ws.add_argument(`
			`"--host",`
			`default="0.0.0.0",`
			`help="host of websocker server",`
			`)`
			`group_ws.add_argument(`
			`"--port",`
			`type=int,`
			`help="Port number of websocker server",`
			`)`

			`group_ctx = parser.add_argument_group("Parsing options")`
			`group_ctx.add_argument(`
Initial commit 2022-09-23 10:20:11 +00:00			`"--beam_size",`
			`"-b",`
			`type=int,`
			`default=5,`
			`)`
Add argparse group 2022-10-02 12:13:36 +00:00			`group_ctx.add_argument(`
Add --num_block option 2022-09-23 11:45:18 +00:00			`"--num_block",`
Fix option 2022-09-23 11:45:56 +00:00			`"-n",`
Add --num_block option 2022-09-23 11:45:18 +00:00			`type=int,`
Fix -n (Resolve #3) 2022-09-24 06:39:41 +00:00			`default=160,`
			`help="Number of operation unit",`
Add --num_block option 2022-09-23 11:45:18 +00:00			`)`
Add argparse group 2022-10-02 12:13:36 +00:00			`group_ctx.add_argument(`
Added --temperature argument 2022-09-24 04:12:24 +00:00			`"--temperature",`
			`"-t",`
			`type=float,`
			`action="append",`
			`default=[],`
			`)`
Add argparse group 2022-10-02 12:13:36 +00:00			`group_ctx.add_argument(`
			`"--allow-padding",`
Fix logger 2022-09-23 13:01:40 +00:00			`action="store_true",`
			`)`
Add argparse group 2022-10-02 12:13:36 +00:00			`group_ctx.add_argument(`
			`"--no-progress",`
			`action="store_true",`
Add websocket server 2022-09-24 11:45:20 +00:00			`)`
Add argparse group 2022-10-02 12:13:36 +00:00			`group_ctx.add_argument(`
			`"--no-vad",`
Add --allow-padding option 2022-09-24 15:43:03 +00:00			`action="store_true",`
			`)`
Add argparse group 2022-10-02 12:13:36 +00:00			`group_ctx.add_argument(`
Merge websocket client to CLI 2022-09-24 19:13:28 +00:00			`"--mode",`
			`choices=["client"],`
			`)`
Add argparse group 2022-10-02 12:13:36 +00:00
			`group_misc = parser.add_argument_group("Other options")`
			`group_misc.add_argument(`
			`"--mic",`
add --show-device option to list available microphones 2022-09-26 16:47:03 +00:00			`)`
Add argparse group 2022-10-02 12:13:36 +00:00			`group_misc.add_argument(`
			`"--show-devices",`
Add progress bar and --no-progress option 2022-09-29 12:59:12 +00:00			`action="store_true",`
			`)`
Add argparse group 2022-10-02 12:13:36 +00:00			`group_misc.add_argument(`
			`"--debug",`
Add --no-vad option 2022-10-02 11:38:21 +00:00			`action="store_true",`
			`)`
Add argparse group 2022-10-02 12:13:36 +00:00
Refactoring 2022-09-27 03:27:22 +00:00			`opts = parser.parse_args()`
Initial commit 2022-09-23 10:20:11 +00:00
Refactoring 2022-09-27 03:27:22 +00:00			`if opts.beam_size <= 0:`
			`opts.beam_size = None`
			`if len(opts.temperature) == 0:`
			`opts.temperature = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]`
			`opts.temperature = sorted(set(opts.temperature))`

			`try:`
			`opts.mic = int(opts.mic)`
			`except Exception:`
			`pass`
			`return opts`
Initial commit 2022-09-23 10:20:11 +00:00

Updated Context 2022-09-29 11:43:49 +00:00			`def get_wshiper(*, opts) -> WhisperStreamingTranscriber:`
Merge websocket client to CLI 2022-09-24 19:13:28 +00:00			`config = WhisperConfig(`
			`model_name=opts.model,`
			`language=opts.language,`
			`device=opts.device,`
			`)`

			`logger.debug(f"WhisperConfig: {config}")`
			`wsp = WhisperStreamingTranscriber(config=config)`
			`return wsp`


Updated Context 2022-09-29 11:43:49 +00:00			`def get_context(*, opts) -> Context:`
			`ctx = Context(`
			`beam_size=opts.beam_size,`
			`temperatures=opts.temperature,`
			`allow_padding=opts.allow_padding,`
Add --no-vad option 2022-10-02 11:38:21 +00:00			`vad=not opts.no_vad,`
Updated Context 2022-09-29 11:43:49 +00:00			`)`
			`logger.debug(f"Context: {ctx}")`
			`return ctx`


Refactoring 2022-09-27 03:27:22 +00:00			`def show_devices():`
			`devices = sd.query_devices()`
			`for i, device in enumerate(devices):`
			`if device["max_input_channels"] > 0:`
			`print(f"{i}: {device['name']}")`
add --show-device option to list available microphones 2022-09-26 16:47:03 +00:00

Refactoring 2022-09-27 03:27:22 +00:00			`def main() -> None:`
			`opts = get_opts()`
add --show-device option to list available microphones 2022-09-26 16:47:03 +00:00
Improved logger 2022-09-23 13:11:36 +00:00			`basicConfig(`
			`level=DEBUG if opts.debug else INFO,`
Update logger 2022-09-24 00:52:33 +00:00			`format="[%(asctime)s] %(module)s.%(funcName)s:%(lineno)d %(levelname)s -> %(message)s",`
Improved logger 2022-09-23 13:11:36 +00:00			`)`
Fix logger 2022-09-23 13:01:40 +00:00
Refactoring 2022-09-27 03:27:22 +00:00			`if opts.show_devices:`
			`return show_devices()`
Add --mic option 2022-09-23 10:28:11 +00:00
Add websocket server 2022-09-24 11:45:20 +00:00			`if opts.host is not None and opts.port is not None:`
Merge websocket client to CLI 2022-09-24 19:13:28 +00:00			`if opts.mode == "client":`
			`assert opts.language is None`
			`assert opts.model is None`
Catch KeyboardInterrupt 2022-10-01 14:43:50 +00:00			`try:`
			`asyncio.run(`
			`run_websocket_client(`
			`opts=opts,`
			`)`
Merge websocket client to CLI 2022-09-24 19:13:28 +00:00			`)`
Catch KeyboardInterrupt 2022-10-01 14:43:50 +00:00			`except KeyboardInterrupt:`
			`pass`
Merge websocket client to CLI 2022-09-24 19:13:28 +00:00			`else:`
			`assert opts.language is not None`
			`assert opts.model is not None`
			`wsp = get_wshiper(opts=opts)`
Updated Context 2022-09-29 11:43:49 +00:00			`ctx: Context = get_context(opts=opts)`
Merge websocket client to CLI 2022-09-24 19:13:28 +00:00			`asyncio.run(`
			`serve_with_websocket(`
			`wsp=wsp,`
			`host=opts.host,`
			`port=opts.port,`
Updated Context 2022-09-29 11:43:49 +00:00			`ctx=ctx,`
Merge websocket client to CLI 2022-09-24 19:13:28 +00:00			`)`
Add websocket server 2022-09-24 11:45:20 +00:00			`)`
			`else:`
Merge websocket client to CLI 2022-09-24 19:13:28 +00:00			`assert opts.language is not None`
			`assert opts.model is not None`
			`wsp = get_wshiper(opts=opts)`
Updated Context 2022-09-29 11:43:49 +00:00			`ctx: Context = get_context(opts=opts)`
Add websocket server 2022-09-24 11:45:20 +00:00			`transcribe_from_mic(`
			`wsp=wsp,`
			`sd_device=opts.mic,`
			`num_block=opts.num_block,`
Add progress bar and --no-progress option 2022-09-29 12:59:12 +00:00			`no_progress=opts.no_progress,`
Updated Context 2022-09-29 11:43:49 +00:00			`ctx=ctx,`
Add websocket server 2022-09-24 11:45:20 +00:00			`)`
Initial commit 2022-09-23 10:20:11 +00:00

			`if __name__ == "__main__":`
			`main()`