Add progress bar and --no-progress option

This commit is contained in:
Yuta Hayashibe 2022-09-29 21:59:12 +09:00
parent 88cda28b83
commit 6e0d62d9e2
5 changed files with 78 additions and 2 deletions

View file

@ -31,6 +31,7 @@ whispering --language en --model tiny
- ``--help`` shows full options - ``--help`` shows full options
- ``--model`` set the [model name](https://github.com/openai/whisper#available-models-and-languages) to use. Larger models will be more accurate, but may not be able to transcribe in real time. - ``--model`` set the [model name](https://github.com/openai/whisper#available-models-and-languages) to use. Larger models will be more accurate, but may not be able to transcribe in real time.
- ``--language`` sets the language to transcribe. The list of languages are shown with ``whispering -h`` - ``--language`` sets the language to transcribe. The list of languages are shown with ``whispering -h``
- ``--no-progress`` disables the progress message
- ``-t`` sets temperatures to decode. You can set several like (``-t 0.0 -t 0.1 -t 0.5``), but too many temperatures exhaust decoding time - ``-t`` sets temperatures to decode. You can set several like (``-t 0.0 -t 0.1 -t 0.5``), but too many temperatures exhaust decoding time
- ``--debug`` outputs logs for debug - ``--debug`` outputs logs for debug

2
poetry.lock generated
View file

@ -514,7 +514,7 @@ resolved_reference = "62fe7f1009a534986ac1d32a4aef8c244d029c28"
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = ">=3.8,<3.11" python-versions = ">=3.8,<3.11"
content-hash = "2022c2468c9ad210bff4327f3f0672fa471eda310aaae2c62304fa5c297593dc" content-hash = "d041d21a202339f405cc37076403f92135ee1f113cdfece5a78c9ee12374be7b"
[metadata.files] [metadata.files]
black = [ black = [

View file

@ -12,6 +12,7 @@ whisper = {git = "https://github.com/openai/whisper.git", rev = '62fe7f1009a5349
sounddevice = "^0.4.5" sounddevice = "^0.4.5"
pydantic = "^1.10.2" pydantic = "^1.10.2"
websockets = "^10.3" websockets = "^10.3"
tqdm = "*"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]

View file

@ -3,15 +3,18 @@
import argparse import argparse
import asyncio import asyncio
import queue import queue
import sys
from logging import DEBUG, INFO, basicConfig, getLogger from logging import DEBUG, INFO, basicConfig, getLogger
from typing import Optional, Union from typing import Optional, Union
import sounddevice as sd import sounddevice as sd
import torch import torch
from transformers.pipelines.text2text_generation import enum
from whisper import available_models from whisper import available_models
from whisper.audio import N_FRAMES, SAMPLE_RATE from whisper.audio import N_FRAMES, SAMPLE_RATE
from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
from whispering.pbar import ProgressBar
from whispering.schema import Context, WhisperConfig from whispering.schema import Context, WhisperConfig
from whispering.serve import serve_with_websocket from whispering.serve import serve_with_websocket
from whispering.transcriber import WhisperStreamingTranscriber from whispering.transcriber import WhisperStreamingTranscriber
@ -26,6 +29,7 @@ def transcribe_from_mic(
sd_device: Optional[Union[int, str]], sd_device: Optional[Union[int, str]],
num_block: int, num_block: int,
ctx: Context, ctx: Context,
no_progress: bool,
) -> None: ) -> None:
q = queue.Queue() q = queue.Queue()
@ -46,10 +50,35 @@ def transcribe_from_mic(
idx: int = 0 idx: int = 0
while True: while True:
logger.debug(f"Segment #: {idx}, The rest of queue: {q.qsize()}") logger.debug(f"Segment #: {idx}, The rest of queue: {q.qsize()}")
segment = q.get()
if no_progress:
segment = q.get()
else:
pbar_thread = ProgressBar(num_block=num_block)
try:
segment = q.get()
except KeyboardInterrupt:
pbar_thread.kill()
return
pbar_thread.kill()
logger.debug(f"Got. The rest of queue: {q.qsize()}")
if not no_progress:
sys.stderr.write("Analyzing")
sys.stderr.flush()
for chunk in wsp.transcribe(segment=segment, ctx=ctx): for chunk in wsp.transcribe(segment=segment, ctx=ctx):
if not no_progress:
sys.stderr.write("\r")
sys.stderr.flush()
print(f"{chunk.start:.2f}->{chunk.end:.2f}\t{chunk.text}") print(f"{chunk.start:.2f}->{chunk.end:.2f}\t{chunk.text}")
if not no_progress:
sys.stderr.write("Analyzing")
sys.stderr.flush()
idx += 1 idx += 1
if not no_progress:
sys.stderr.write("\r")
sys.stderr.flush()
def get_opts() -> argparse.Namespace: def get_opts() -> argparse.Namespace:
@ -121,6 +150,10 @@ def get_opts() -> argparse.Namespace:
"--show-devices", "--show-devices",
action="store_true", action="store_true",
) )
parser.add_argument(
"--no-progress",
action="store_true",
)
opts = parser.parse_args() opts = parser.parse_args()
if opts.beam_size <= 0: if opts.beam_size <= 0:
@ -207,6 +240,7 @@ def main() -> None:
wsp=wsp, wsp=wsp,
sd_device=opts.mic, sd_device=opts.mic,
num_block=opts.num_block, num_block=opts.num_block,
no_progress=opts.no_progress,
ctx=ctx, ctx=ctx,
) )

40
whispering/pbar.py Normal file
View file

@ -0,0 +1,40 @@
#!/usr/bin/env python3
import threading
import time
from tqdm import tqdm
from whisper.audio import CHUNK_LENGTH, HOP_LENGTH
class ProgressBar(threading.Thread):
def __init__(self, *, num_block: int):
super().__init__()
self.started = threading.Event()
self.alive = True
self.start()
self.started.set() # start
self.num_block = num_block
def __del__(self):
self.kill()
def kill(self):
self.started.set()
self.alive = False
self.join()
def end(self):
self.started.clear()
def run(self):
self.started.wait()
with tqdm(
total=self.num_block,
leave=False,
bar_format="Listening (Elapsed: {elapsed}, Estimated remaining: {remaining})",
) as t:
for _ in range(self.num_block):
time.sleep(CHUNK_LENGTH / HOP_LENGTH)
t.update(1)
if not self.alive:
break