mirror of
https://github.com/shirayu/whispering.git
synced 2025-02-16 10:35:16 +00:00
Add progress bar and --no-progress option
This commit is contained in:
parent
88cda28b83
commit
6e0d62d9e2
5 changed files with 78 additions and 2 deletions
|
@ -31,6 +31,7 @@ whispering --language en --model tiny
|
|||
- ``--help`` shows full options
|
||||
- ``--model`` set the [model name](https://github.com/openai/whisper#available-models-and-languages) to use. Larger models will be more accurate, but may not be able to transcribe in real time.
|
||||
- ``--language`` sets the language to transcribe. The list of languages are shown with ``whispering -h``
|
||||
- ``--no-progress`` disables the progress message
|
||||
- ``-t`` sets temperatures to decode. You can set several like (``-t 0.0 -t 0.1 -t 0.5``), but too many temperatures exhaust decoding time
|
||||
- ``--debug`` outputs logs for debug
|
||||
|
||||
|
|
2
poetry.lock
generated
2
poetry.lock
generated
|
@ -514,7 +514,7 @@ resolved_reference = "62fe7f1009a534986ac1d32a4aef8c244d029c28"
|
|||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = ">=3.8,<3.11"
|
||||
content-hash = "2022c2468c9ad210bff4327f3f0672fa471eda310aaae2c62304fa5c297593dc"
|
||||
content-hash = "d041d21a202339f405cc37076403f92135ee1f113cdfece5a78c9ee12374be7b"
|
||||
|
||||
[metadata.files]
|
||||
black = [
|
||||
|
|
|
@ -12,6 +12,7 @@ whisper = {git = "https://github.com/openai/whisper.git", rev = '62fe7f1009a5349
|
|||
sounddevice = "^0.4.5"
|
||||
pydantic = "^1.10.2"
|
||||
websockets = "^10.3"
|
||||
tqdm = "*"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
|
|
|
@ -3,15 +3,18 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
import queue
|
||||
import sys
|
||||
from logging import DEBUG, INFO, basicConfig, getLogger
|
||||
from typing import Optional, Union
|
||||
|
||||
import sounddevice as sd
|
||||
import torch
|
||||
from transformers.pipelines.text2text_generation import enum
|
||||
from whisper import available_models
|
||||
from whisper.audio import N_FRAMES, SAMPLE_RATE
|
||||
from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
|
||||
|
||||
from whispering.pbar import ProgressBar
|
||||
from whispering.schema import Context, WhisperConfig
|
||||
from whispering.serve import serve_with_websocket
|
||||
from whispering.transcriber import WhisperStreamingTranscriber
|
||||
|
@ -26,6 +29,7 @@ def transcribe_from_mic(
|
|||
sd_device: Optional[Union[int, str]],
|
||||
num_block: int,
|
||||
ctx: Context,
|
||||
no_progress: bool,
|
||||
) -> None:
|
||||
q = queue.Queue()
|
||||
|
||||
|
@ -46,10 +50,35 @@ def transcribe_from_mic(
|
|||
idx: int = 0
|
||||
while True:
|
||||
logger.debug(f"Segment #: {idx}, The rest of queue: {q.qsize()}")
|
||||
segment = q.get()
|
||||
|
||||
if no_progress:
|
||||
segment = q.get()
|
||||
else:
|
||||
pbar_thread = ProgressBar(num_block=num_block)
|
||||
try:
|
||||
segment = q.get()
|
||||
except KeyboardInterrupt:
|
||||
pbar_thread.kill()
|
||||
return
|
||||
pbar_thread.kill()
|
||||
|
||||
logger.debug(f"Got. The rest of queue: {q.qsize()}")
|
||||
if not no_progress:
|
||||
sys.stderr.write("Analyzing")
|
||||
sys.stderr.flush()
|
||||
|
||||
for chunk in wsp.transcribe(segment=segment, ctx=ctx):
|
||||
if not no_progress:
|
||||
sys.stderr.write("\r")
|
||||
sys.stderr.flush()
|
||||
print(f"{chunk.start:.2f}->{chunk.end:.2f}\t{chunk.text}")
|
||||
if not no_progress:
|
||||
sys.stderr.write("Analyzing")
|
||||
sys.stderr.flush()
|
||||
idx += 1
|
||||
if not no_progress:
|
||||
sys.stderr.write("\r")
|
||||
sys.stderr.flush()
|
||||
|
||||
|
||||
def get_opts() -> argparse.Namespace:
|
||||
|
@ -121,6 +150,10 @@ def get_opts() -> argparse.Namespace:
|
|||
"--show-devices",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-progress",
|
||||
action="store_true",
|
||||
)
|
||||
opts = parser.parse_args()
|
||||
|
||||
if opts.beam_size <= 0:
|
||||
|
@ -207,6 +240,7 @@ def main() -> None:
|
|||
wsp=wsp,
|
||||
sd_device=opts.mic,
|
||||
num_block=opts.num_block,
|
||||
no_progress=opts.no_progress,
|
||||
ctx=ctx,
|
||||
)
|
||||
|
||||
|
|
40
whispering/pbar.py
Normal file
40
whispering/pbar.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/env python3
|
||||
import threading
|
||||
import time
|
||||
|
||||
from tqdm import tqdm
|
||||
from whisper.audio import CHUNK_LENGTH, HOP_LENGTH
|
||||
|
||||
|
||||
class ProgressBar(threading.Thread):
|
||||
def __init__(self, *, num_block: int):
|
||||
super().__init__()
|
||||
self.started = threading.Event()
|
||||
self.alive = True
|
||||
self.start()
|
||||
self.started.set() # start
|
||||
self.num_block = num_block
|
||||
|
||||
def __del__(self):
|
||||
self.kill()
|
||||
|
||||
def kill(self):
|
||||
self.started.set()
|
||||
self.alive = False
|
||||
self.join()
|
||||
|
||||
def end(self):
|
||||
self.started.clear()
|
||||
|
||||
def run(self):
|
||||
self.started.wait()
|
||||
with tqdm(
|
||||
total=self.num_block,
|
||||
leave=False,
|
||||
bar_format="Listening (Elapsed: {elapsed}, Estimated remaining: {remaining})",
|
||||
) as t:
|
||||
for _ in range(self.num_block):
|
||||
time.sleep(CHUNK_LENGTH / HOP_LENGTH)
|
||||
t.update(1)
|
||||
if not self.alive:
|
||||
break
|
Loading…
Reference in a new issue