Add progress bar and --no-progress option

2024-06-02 13:29:34 +00:00 · 2022-09-29 21:59:12 +09:00 · 2022-09-29 21:59:12 +09:00 · 6e0d62d9e2
parent 88cda28b83
commit 6e0d62d9e2
5 changed files with 78 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -31,6 +31,7 @@ whispering --language en --model tiny
 - ``--help`` shows full options
 - ``--model`` set the [model name](https://github.com/openai/whisper#available-models-and-languages) to use. Larger models will be more accurate, but may not be able to transcribe in real time.
 - ``--language`` sets the language to transcribe. The list of languages are shown with ``whispering -h``
 - ``--no-progress`` disables the progress message
 - ``-t`` sets temperatures to decode. You can set several like (``-t 0.0 -t 0.1 -t 0.5``), but too many temperatures exhaust decoding time
 - ``--debug`` outputs logs for debug
--- a/poetry.lock
+++ b/poetry.lock
@ -514,7 +514,7 @@ resolved_reference = "62fe7f1009a534986ac1d32a4aef8c244d029c28"
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.8,<3.11"
-content-hash = "2022c2468c9ad210bff4327f3f0672fa471eda310aaae2c62304fa5c297593dc"
+content-hash = "d041d21a202339f405cc37076403f92135ee1f113cdfece5a78c9ee12374be7b"
 [metadata.files]
 black = [
--- a/pyproject.toml
+++ b/pyproject.toml
@ -12,6 +12,7 @@ whisper = {git = "https://github.com/openai/whisper.git", rev = '62fe7f1009a5349
 sounddevice = "^0.4.5"
 pydantic = "^1.10.2"
 websockets = "^10.3"
 tqdm = "*"
 [tool.poetry.group.dev.dependencies]
--- a/whispering/cli.py
+++ b/whispering/cli.py
@ -3,15 +3,18 @@
 import argparse
 import asyncio
 import queue
 import sys
 from logging import DEBUG, INFO, basicConfig, getLogger
 from typing import Optional, Union
 import sounddevice as sd
 import torch
 from transformers.pipelines.text2text_generation import enum
 from whisper import available_models
 from whisper.audio import N_FRAMES, SAMPLE_RATE
 from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
 from whispering.pbar import ProgressBar
 from whispering.schema import Context, WhisperConfig
 from whispering.serve import serve_with_websocket
 from whispering.transcriber import WhisperStreamingTranscriber
@ -26,6 +29,7 @@ def transcribe_from_mic(
    sd_device: Optional[Union[int, str]],
    num_block: int,
    ctx: Context,
    no_progress: bool,
 ) -> None:
    q = queue.Queue()
@ -46,10 +50,35 @@ def transcribe_from_mic(
        idx: int = 0
        while True:
            logger.debug(f"Segment #: {idx}, The rest of queue: {q.qsize()}")
-            segment = q.get()
+
            if no_progress:
                segment = q.get()
            else:
                pbar_thread = ProgressBar(num_block=num_block)
                try:
                    segment = q.get()
                except KeyboardInterrupt:
                    pbar_thread.kill()
                    return
                pbar_thread.kill()
            logger.debug(f"Got. The rest of queue: {q.qsize()}")
            if not no_progress:
                sys.stderr.write("Analyzing")
                sys.stderr.flush()
            for chunk in wsp.transcribe(segment=segment, ctx=ctx):
                if not no_progress:
                    sys.stderr.write("\r")
                    sys.stderr.flush()
                print(f"{chunk.start:.2f}->{chunk.end:.2f}\t{chunk.text}")
                if not no_progress:
                    sys.stderr.write("Analyzing")
                    sys.stderr.flush()
            idx += 1
            if not no_progress:
                sys.stderr.write("\r")
                sys.stderr.flush()
 def get_opts() -> argparse.Namespace:
@ -121,6 +150,10 @@ def get_opts() -> argparse.Namespace:
        "--show-devices",
        action="store_true",
    )
    parser.add_argument(
        "--no-progress",
        action="store_true",
    )
    opts = parser.parse_args()
    if opts.beam_size <= 0:
@ -207,6 +240,7 @@ def main() -> None:
            wsp=wsp,
            sd_device=opts.mic,
            num_block=opts.num_block,
            no_progress=opts.no_progress,
            ctx=ctx,
        )
--- a/whispering/pbar.py
+++ b/whispering/pbar.py
@ -0,0 +1,40 @@
 #!/usr/bin/env python3
 import threading
 import time
 from tqdm import tqdm
 from whisper.audio import CHUNK_LENGTH, HOP_LENGTH
 class ProgressBar(threading.Thread):
    def __init__(self, *, num_block: int):
        super().__init__()
        self.started = threading.Event()
        self.alive = True
        self.start()
        self.started.set()  # start
        self.num_block = num_block
    def __del__(self):
        self.kill()
    def kill(self):
        self.started.set()
        self.alive = False
        self.join()
    def end(self):
        self.started.clear()
    def run(self):
        self.started.wait()
        with tqdm(
            total=self.num_block,
            leave=False,
            bar_format="Listening (Elapsed: {elapsed}, Estimated remaining: {remaining})",
        ) as t:
            for _ in range(self.num_block):
                time.sleep(CHUNK_LENGTH / HOP_LENGTH)
                t.update(1)
                if not self.alive:
                    break