Add progress bar and --no-progress option

2025-06-06 05:38:48 +00:00 · 2022-09-29 21:59:12 +09:00 · 2022-09-29 21:59:12 +09:00 · 6e0d62d9e2
commit 6e0d62d9e2
parent 88cda28b83
5 changed files with 78 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -31,6 +31,7 @@ whispering --language en --model tiny
 - ``--help`` shows full options
 - ``--model`` set the [model name](https://github.com/openai/whisper#available-models-and-languages) to use. Larger models will be more accurate, but may not be able to transcribe in real time.
 - ``--language`` sets the language to transcribe. The list of languages are shown with ``whispering -h``
+- ``--no-progress`` disables the progress message
 - ``-t`` sets temperatures to decode. You can set several like (``-t 0.0 -t 0.1 -t 0.5``), but too many temperatures exhaust decoding time
 - ``--debug`` outputs logs for debug

--- a/poetry.lock
+++ b/poetry.lock
@ -514,7 +514,7 @@ resolved_reference = "62fe7f1009a534986ac1d32a4aef8c244d029c28"
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.8,<3.11"
-content-hash = "2022c2468c9ad210bff4327f3f0672fa471eda310aaae2c62304fa5c297593dc"
+content-hash = "d041d21a202339f405cc37076403f92135ee1f113cdfece5a78c9ee12374be7b"

 [metadata.files]
 black = [
--- a/pyproject.toml
+++ b/pyproject.toml
@ -12,6 +12,7 @@ whisper = {git = "https://github.com/openai/whisper.git", rev = '62fe7f1009a5349
 sounddevice = "^0.4.5"
 pydantic = "^1.10.2"
 websockets = "^10.3"
+tqdm = "*"


 [tool.poetry.group.dev.dependencies]
--- a/whispering/cli.py
+++ b/whispering/cli.py
@ -3,15 +3,18 @@
 import argparse
 import asyncio
 import queue
+import sys
 from logging import DEBUG, INFO, basicConfig, getLogger
 from typing import Optional, Union

 import sounddevice as sd
 import torch
+from transformers.pipelines.text2text_generation import enum
 from whisper import available_models
 from whisper.audio import N_FRAMES, SAMPLE_RATE
 from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE

+from whispering.pbar import ProgressBar
 from whispering.schema import Context, WhisperConfig
 from whispering.serve import serve_with_websocket
 from whispering.transcriber import WhisperStreamingTranscriber
@ -26,6 +29,7 @@ def transcribe_from_mic(
    sd_device: Optional[Union[int, str]],
    num_block: int,
    ctx: Context,
+    no_progress: bool,
 ) -> None:
    q = queue.Queue()

@ -46,10 +50,35 @@ def transcribe_from_mic(
        idx: int = 0
        while True:
            logger.debug(f"Segment #: {idx}, The rest of queue: {q.qsize()}")
-            segment = q.get()
+
+            if no_progress:
+                segment = q.get()
+            else:
+                pbar_thread = ProgressBar(num_block=num_block)
+                try:
+                    segment = q.get()
+                except KeyboardInterrupt:
+                    pbar_thread.kill()
+                    return
+                pbar_thread.kill()
+
+            logger.debug(f"Got. The rest of queue: {q.qsize()}")
+            if not no_progress:
+                sys.stderr.write("Analyzing")
+                sys.stderr.flush()
+
            for chunk in wsp.transcribe(segment=segment, ctx=ctx):
+                if not no_progress:
+                    sys.stderr.write("\r")
+                    sys.stderr.flush()
                print(f"{chunk.start:.2f}->{chunk.end:.2f}\t{chunk.text}")
+                if not no_progress:
+                    sys.stderr.write("Analyzing")
+                    sys.stderr.flush()
            idx += 1
+            if not no_progress:
+                sys.stderr.write("\r")
+                sys.stderr.flush()


 def get_opts() -> argparse.Namespace:
@ -121,6 +150,10 @@ def get_opts() -> argparse.Namespace:
        "--show-devices",
        action="store_true",
    )
+    parser.add_argument(
+        "--no-progress",
+        action="store_true",
+    )
    opts = parser.parse_args()

    if opts.beam_size <= 0:
@ -207,6 +240,7 @@ def main() -> None:
            wsp=wsp,
            sd_device=opts.mic,
            num_block=opts.num_block,
+            no_progress=opts.no_progress,
            ctx=ctx,
        )

--- a/whispering/pbar.py
+++ b/whispering/pbar.py
@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+import threading
+import time
+
+from tqdm import tqdm
+from whisper.audio import CHUNK_LENGTH, HOP_LENGTH
+
+
+class ProgressBar(threading.Thread):
+    def __init__(self, *, num_block: int):
+        super().__init__()
+        self.started = threading.Event()
+        self.alive = True
+        self.start()
+        self.started.set()  # start
+        self.num_block = num_block
+
+    def __del__(self):
+        self.kill()
+
+    def kill(self):
+        self.started.set()
+        self.alive = False
+        self.join()
+
+    def end(self):
+        self.started.clear()
+
+    def run(self):
+        self.started.wait()
+        with tqdm(
+            total=self.num_block,
+            leave=False,
+            bar_format="Listening (Elapsed: {elapsed}, Estimated remaining: {remaining})",
+        ) as t:
+            for _ in range(self.num_block):
+                time.sleep(CHUNK_LENGTH / HOP_LENGTH)
+                t.update(1)
+                if not self.alive:
+                    break