From 6e0d62d9e26cf587556b96e97047315fdc16e088 Mon Sep 17 00:00:00 2001
From: Yuta Hayashibe <yuta@hayashibe.jp>
Date: Thu, 29 Sep 2022 21:59:12 +0900
Subject: [PATCH] Add progress bar and --no-progress option

---
 README.md          |  1 +
 poetry.lock        |  2 +-
 pyproject.toml     |  1 +
 whispering/cli.py  | 36 +++++++++++++++++++++++++++++++++++-
 whispering/pbar.py | 40 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 78 insertions(+), 2 deletions(-)
 create mode 100644 whispering/pbar.py

diff --git a/README.md b/README.md
index 528a8d9..bba104e 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ whispering --language en --model tiny
 - ``--help`` shows full options
 - ``--model`` set the [model name](https://github.com/openai/whisper#available-models-and-languages) to use. Larger models will be more accurate, but may not be able to transcribe in real time.
 - ``--language`` sets the language to transcribe. The list of languages are shown with ``whispering -h``
+- ``--no-progress`` disables the progress message
 - ``-t`` sets temperatures to decode. You can set several like (``-t 0.0 -t 0.1 -t 0.5``), but too many temperatures exhaust decoding time
 - ``--debug`` outputs logs for debug
 
diff --git a/poetry.lock b/poetry.lock
index 63c8326..4f43d3a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -514,7 +514,7 @@ resolved_reference = "62fe7f1009a534986ac1d32a4aef8c244d029c28"
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.8,<3.11"
-content-hash = "2022c2468c9ad210bff4327f3f0672fa471eda310aaae2c62304fa5c297593dc"
+content-hash = "d041d21a202339f405cc37076403f92135ee1f113cdfece5a78c9ee12374be7b"
 
 [metadata.files]
 black = [
diff --git a/pyproject.toml b/pyproject.toml
index b5dfc28..8dbf250 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,6 +12,7 @@ whisper = {git = "https://github.com/openai/whisper.git", rev = '62fe7f1009a5349
 sounddevice = "^0.4.5"
 pydantic = "^1.10.2"
 websockets = "^10.3"
+tqdm = "*"
 
 
 [tool.poetry.group.dev.dependencies]
diff --git a/whispering/cli.py b/whispering/cli.py
index 9a7bf42..aa4a3fa 100644
--- a/whispering/cli.py
+++ b/whispering/cli.py
@@ -3,15 +3,18 @@
 import argparse
 import asyncio
 import queue
+import sys
 from logging import DEBUG, INFO, basicConfig, getLogger
 from typing import Optional, Union
 
 import sounddevice as sd
 import torch
+from transformers.pipelines.text2text_generation import enum
 from whisper import available_models
 from whisper.audio import N_FRAMES, SAMPLE_RATE
 from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
 
+from whispering.pbar import ProgressBar
 from whispering.schema import Context, WhisperConfig
 from whispering.serve import serve_with_websocket
 from whispering.transcriber import WhisperStreamingTranscriber
@@ -26,6 +29,7 @@ def transcribe_from_mic(
     sd_device: Optional[Union[int, str]],
     num_block: int,
     ctx: Context,
+    no_progress: bool,
 ) -> None:
     q = queue.Queue()
 
@@ -46,10 +50,35 @@ def transcribe_from_mic(
         idx: int = 0
         while True:
             logger.debug(f"Segment #: {idx}, The rest of queue: {q.qsize()}")
-            segment = q.get()
+
+            if no_progress:
+                segment = q.get()
+            else:
+                pbar_thread = ProgressBar(num_block=num_block)
+                try:
+                    segment = q.get()
+                except KeyboardInterrupt:
+                    pbar_thread.kill()
+                    return
+                pbar_thread.kill()
+
+            logger.debug(f"Got. The rest of queue: {q.qsize()}")
+            if not no_progress:
+                sys.stderr.write("Analyzing")
+                sys.stderr.flush()
+
             for chunk in wsp.transcribe(segment=segment, ctx=ctx):
+                if not no_progress:
+                    sys.stderr.write("\r")
+                    sys.stderr.flush()
                 print(f"{chunk.start:.2f}->{chunk.end:.2f}\t{chunk.text}")
+                if not no_progress:
+                    sys.stderr.write("Analyzing")
+                    sys.stderr.flush()
             idx += 1
+            if not no_progress:
+                sys.stderr.write("\r")
+                sys.stderr.flush()
 
 
 def get_opts() -> argparse.Namespace:
@@ -121,6 +150,10 @@ def get_opts() -> argparse.Namespace:
         "--show-devices",
         action="store_true",
     )
+    parser.add_argument(
+        "--no-progress",
+        action="store_true",
+    )
     opts = parser.parse_args()
 
     if opts.beam_size <= 0:
@@ -207,6 +240,7 @@ def main() -> None:
             wsp=wsp,
             sd_device=opts.mic,
             num_block=opts.num_block,
+            no_progress=opts.no_progress,
             ctx=ctx,
         )
 
diff --git a/whispering/pbar.py b/whispering/pbar.py
new file mode 100644
index 0000000..9324cf1
--- /dev/null
+++ b/whispering/pbar.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+import threading
+import time
+
+from tqdm import tqdm
+from whisper.audio import CHUNK_LENGTH, HOP_LENGTH
+
+
+class ProgressBar(threading.Thread):
+    def __init__(self, *, num_block: int):
+        super().__init__()
+        self.started = threading.Event()
+        self.alive = True
+        self.start()
+        self.started.set()  # start
+        self.num_block = num_block
+
+    def __del__(self):
+        self.kill()
+
+    def kill(self):
+        self.started.set()
+        self.alive = False
+        self.join()
+
+    def end(self):
+        self.started.clear()
+
+    def run(self):
+        self.started.wait()
+        with tqdm(
+            total=self.num_block,
+            leave=False,
+            bar_format="Listening (Elapsed: {elapsed}, Estimated remaining: {remaining})",
+        ) as t:
+            for _ in range(self.num_block):
+                time.sleep(CHUNK_LENGTH / HOP_LENGTH)
+                t.update(1)
+                if not self.alive:
+                    break