From 6e0d62d9e26cf587556b96e97047315fdc16e088 Mon Sep 17 00:00:00 2001 From: Yuta Hayashibe Date: Thu, 29 Sep 2022 21:59:12 +0900 Subject: [PATCH] Add progress bar and --no-progress option --- README.md | 1 + poetry.lock | 2 +- pyproject.toml | 1 + whispering/cli.py | 36 +++++++++++++++++++++++++++++++++++- whispering/pbar.py | 40 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 whispering/pbar.py diff --git a/README.md b/README.md index 528a8d9..bba104e 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ whispering --language en --model tiny - ``--help`` shows full options - ``--model`` set the [model name](https://github.com/openai/whisper#available-models-and-languages) to use. Larger models will be more accurate, but may not be able to transcribe in real time. - ``--language`` sets the language to transcribe. The list of languages are shown with ``whispering -h`` +- ``--no-progress`` disables the progress message - ``-t`` sets temperatures to decode. You can set several like (``-t 0.0 -t 0.1 -t 0.5``), but too many temperatures exhaust decoding time - ``--debug`` outputs logs for debug diff --git a/poetry.lock b/poetry.lock index 63c8326..4f43d3a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -514,7 +514,7 @@ resolved_reference = "62fe7f1009a534986ac1d32a4aef8c244d029c28" [metadata] lock-version = "1.1" python-versions = ">=3.8,<3.11" -content-hash = "2022c2468c9ad210bff4327f3f0672fa471eda310aaae2c62304fa5c297593dc" +content-hash = "d041d21a202339f405cc37076403f92135ee1f113cdfece5a78c9ee12374be7b" [metadata.files] black = [ diff --git a/pyproject.toml b/pyproject.toml index b5dfc28..8dbf250 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ whisper = {git = "https://github.com/openai/whisper.git", rev = '62fe7f1009a5349 sounddevice = "^0.4.5" pydantic = "^1.10.2" websockets = "^10.3" +tqdm = "*" [tool.poetry.group.dev.dependencies] diff --git a/whispering/cli.py b/whispering/cli.py index 9a7bf42..aa4a3fa 100644 --- a/whispering/cli.py +++ b/whispering/cli.py @@ -3,15 +3,18 @@ import argparse import asyncio import queue +import sys from logging import DEBUG, INFO, basicConfig, getLogger from typing import Optional, Union import sounddevice as sd import torch +from transformers.pipelines.text2text_generation import enum from whisper import available_models from whisper.audio import N_FRAMES, SAMPLE_RATE from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE +from whispering.pbar import ProgressBar from whispering.schema import Context, WhisperConfig from whispering.serve import serve_with_websocket from whispering.transcriber import WhisperStreamingTranscriber @@ -26,6 +29,7 @@ def transcribe_from_mic( sd_device: Optional[Union[int, str]], num_block: int, ctx: Context, + no_progress: bool, ) -> None: q = queue.Queue() @@ -46,10 +50,35 @@ def transcribe_from_mic( idx: int = 0 while True: logger.debug(f"Segment #: {idx}, The rest of queue: {q.qsize()}") - segment = q.get() + + if no_progress: + segment = q.get() + else: + pbar_thread = ProgressBar(num_block=num_block) + try: + segment = q.get() + except KeyboardInterrupt: + pbar_thread.kill() + return + pbar_thread.kill() + + logger.debug(f"Got. The rest of queue: {q.qsize()}") + if not no_progress: + sys.stderr.write("Analyzing") + sys.stderr.flush() + for chunk in wsp.transcribe(segment=segment, ctx=ctx): + if not no_progress: + sys.stderr.write("\r") + sys.stderr.flush() print(f"{chunk.start:.2f}->{chunk.end:.2f}\t{chunk.text}") + if not no_progress: + sys.stderr.write("Analyzing") + sys.stderr.flush() idx += 1 + if not no_progress: + sys.stderr.write("\r") + sys.stderr.flush() def get_opts() -> argparse.Namespace: @@ -121,6 +150,10 @@ def get_opts() -> argparse.Namespace: "--show-devices", action="store_true", ) + parser.add_argument( + "--no-progress", + action="store_true", + ) opts = parser.parse_args() if opts.beam_size <= 0: @@ -207,6 +240,7 @@ def main() -> None: wsp=wsp, sd_device=opts.mic, num_block=opts.num_block, + no_progress=opts.no_progress, ctx=ctx, ) diff --git a/whispering/pbar.py b/whispering/pbar.py new file mode 100644 index 0000000..9324cf1 --- /dev/null +++ b/whispering/pbar.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +import threading +import time + +from tqdm import tqdm +from whisper.audio import CHUNK_LENGTH, HOP_LENGTH + + +class ProgressBar(threading.Thread): + def __init__(self, *, num_block: int): + super().__init__() + self.started = threading.Event() + self.alive = True + self.start() + self.started.set() # start + self.num_block = num_block + + def __del__(self): + self.kill() + + def kill(self): + self.started.set() + self.alive = False + self.join() + + def end(self): + self.started.clear() + + def run(self): + self.started.wait() + with tqdm( + total=self.num_block, + leave=False, + bar_format="Listening (Elapsed: {elapsed}, Estimated remaining: {remaining})", + ) as t: + for _ in range(self.num_block): + time.sleep(CHUNK_LENGTH / HOP_LENGTH) + t.update(1) + if not self.alive: + break