mirror of
https://github.com/shirayu/whispering.git
synced 2024-09-20 18:40:12 +00:00
Add progress bar and --no-progress option
This commit is contained in:
parent
88cda28b83
commit
6e0d62d9e2
5 changed files with 78 additions and 2 deletions
|
@ -31,6 +31,7 @@ whispering --language en --model tiny
|
||||||
- ``--help`` shows full options
|
- ``--help`` shows full options
|
||||||
- ``--model`` set the [model name](https://github.com/openai/whisper#available-models-and-languages) to use. Larger models will be more accurate, but may not be able to transcribe in real time.
|
- ``--model`` set the [model name](https://github.com/openai/whisper#available-models-and-languages) to use. Larger models will be more accurate, but may not be able to transcribe in real time.
|
||||||
- ``--language`` sets the language to transcribe. The list of languages are shown with ``whispering -h``
|
- ``--language`` sets the language to transcribe. The list of languages are shown with ``whispering -h``
|
||||||
|
- ``--no-progress`` disables the progress message
|
||||||
- ``-t`` sets temperatures to decode. You can set several like (``-t 0.0 -t 0.1 -t 0.5``), but too many temperatures exhaust decoding time
|
- ``-t`` sets temperatures to decode. You can set several like (``-t 0.0 -t 0.1 -t 0.5``), but too many temperatures exhaust decoding time
|
||||||
- ``--debug`` outputs logs for debug
|
- ``--debug`` outputs logs for debug
|
||||||
|
|
||||||
|
|
2
poetry.lock
generated
2
poetry.lock
generated
|
@ -514,7 +514,7 @@ resolved_reference = "62fe7f1009a534986ac1d32a4aef8c244d029c28"
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "1.1"
|
lock-version = "1.1"
|
||||||
python-versions = ">=3.8,<3.11"
|
python-versions = ">=3.8,<3.11"
|
||||||
content-hash = "2022c2468c9ad210bff4327f3f0672fa471eda310aaae2c62304fa5c297593dc"
|
content-hash = "d041d21a202339f405cc37076403f92135ee1f113cdfece5a78c9ee12374be7b"
|
||||||
|
|
||||||
[metadata.files]
|
[metadata.files]
|
||||||
black = [
|
black = [
|
||||||
|
|
|
@ -12,6 +12,7 @@ whisper = {git = "https://github.com/openai/whisper.git", rev = '62fe7f1009a5349
|
||||||
sounddevice = "^0.4.5"
|
sounddevice = "^0.4.5"
|
||||||
pydantic = "^1.10.2"
|
pydantic = "^1.10.2"
|
||||||
websockets = "^10.3"
|
websockets = "^10.3"
|
||||||
|
tqdm = "*"
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
|
|
@ -3,15 +3,18 @@
|
||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
import queue
|
import queue
|
||||||
|
import sys
|
||||||
from logging import DEBUG, INFO, basicConfig, getLogger
|
from logging import DEBUG, INFO, basicConfig, getLogger
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
import sounddevice as sd
|
import sounddevice as sd
|
||||||
import torch
|
import torch
|
||||||
|
from transformers.pipelines.text2text_generation import enum
|
||||||
from whisper import available_models
|
from whisper import available_models
|
||||||
from whisper.audio import N_FRAMES, SAMPLE_RATE
|
from whisper.audio import N_FRAMES, SAMPLE_RATE
|
||||||
from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
|
from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
|
||||||
|
|
||||||
|
from whispering.pbar import ProgressBar
|
||||||
from whispering.schema import Context, WhisperConfig
|
from whispering.schema import Context, WhisperConfig
|
||||||
from whispering.serve import serve_with_websocket
|
from whispering.serve import serve_with_websocket
|
||||||
from whispering.transcriber import WhisperStreamingTranscriber
|
from whispering.transcriber import WhisperStreamingTranscriber
|
||||||
|
@ -26,6 +29,7 @@ def transcribe_from_mic(
|
||||||
sd_device: Optional[Union[int, str]],
|
sd_device: Optional[Union[int, str]],
|
||||||
num_block: int,
|
num_block: int,
|
||||||
ctx: Context,
|
ctx: Context,
|
||||||
|
no_progress: bool,
|
||||||
) -> None:
|
) -> None:
|
||||||
q = queue.Queue()
|
q = queue.Queue()
|
||||||
|
|
||||||
|
@ -46,10 +50,35 @@ def transcribe_from_mic(
|
||||||
idx: int = 0
|
idx: int = 0
|
||||||
while True:
|
while True:
|
||||||
logger.debug(f"Segment #: {idx}, The rest of queue: {q.qsize()}")
|
logger.debug(f"Segment #: {idx}, The rest of queue: {q.qsize()}")
|
||||||
|
|
||||||
|
if no_progress:
|
||||||
segment = q.get()
|
segment = q.get()
|
||||||
|
else:
|
||||||
|
pbar_thread = ProgressBar(num_block=num_block)
|
||||||
|
try:
|
||||||
|
segment = q.get()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pbar_thread.kill()
|
||||||
|
return
|
||||||
|
pbar_thread.kill()
|
||||||
|
|
||||||
|
logger.debug(f"Got. The rest of queue: {q.qsize()}")
|
||||||
|
if not no_progress:
|
||||||
|
sys.stderr.write("Analyzing")
|
||||||
|
sys.stderr.flush()
|
||||||
|
|
||||||
for chunk in wsp.transcribe(segment=segment, ctx=ctx):
|
for chunk in wsp.transcribe(segment=segment, ctx=ctx):
|
||||||
|
if not no_progress:
|
||||||
|
sys.stderr.write("\r")
|
||||||
|
sys.stderr.flush()
|
||||||
print(f"{chunk.start:.2f}->{chunk.end:.2f}\t{chunk.text}")
|
print(f"{chunk.start:.2f}->{chunk.end:.2f}\t{chunk.text}")
|
||||||
|
if not no_progress:
|
||||||
|
sys.stderr.write("Analyzing")
|
||||||
|
sys.stderr.flush()
|
||||||
idx += 1
|
idx += 1
|
||||||
|
if not no_progress:
|
||||||
|
sys.stderr.write("\r")
|
||||||
|
sys.stderr.flush()
|
||||||
|
|
||||||
|
|
||||||
def get_opts() -> argparse.Namespace:
|
def get_opts() -> argparse.Namespace:
|
||||||
|
@ -121,6 +150,10 @@ def get_opts() -> argparse.Namespace:
|
||||||
"--show-devices",
|
"--show-devices",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-progress",
|
||||||
|
action="store_true",
|
||||||
|
)
|
||||||
opts = parser.parse_args()
|
opts = parser.parse_args()
|
||||||
|
|
||||||
if opts.beam_size <= 0:
|
if opts.beam_size <= 0:
|
||||||
|
@ -207,6 +240,7 @@ def main() -> None:
|
||||||
wsp=wsp,
|
wsp=wsp,
|
||||||
sd_device=opts.mic,
|
sd_device=opts.mic,
|
||||||
num_block=opts.num_block,
|
num_block=opts.num_block,
|
||||||
|
no_progress=opts.no_progress,
|
||||||
ctx=ctx,
|
ctx=ctx,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
40
whispering/pbar.py
Normal file
40
whispering/pbar.py
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
from whisper.audio import CHUNK_LENGTH, HOP_LENGTH
|
||||||
|
|
||||||
|
|
||||||
|
class ProgressBar(threading.Thread):
|
||||||
|
def __init__(self, *, num_block: int):
|
||||||
|
super().__init__()
|
||||||
|
self.started = threading.Event()
|
||||||
|
self.alive = True
|
||||||
|
self.start()
|
||||||
|
self.started.set() # start
|
||||||
|
self.num_block = num_block
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
self.kill()
|
||||||
|
|
||||||
|
def kill(self):
|
||||||
|
self.started.set()
|
||||||
|
self.alive = False
|
||||||
|
self.join()
|
||||||
|
|
||||||
|
def end(self):
|
||||||
|
self.started.clear()
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
self.started.wait()
|
||||||
|
with tqdm(
|
||||||
|
total=self.num_block,
|
||||||
|
leave=False,
|
||||||
|
bar_format="Listening (Elapsed: {elapsed}, Estimated remaining: {remaining})",
|
||||||
|
) as t:
|
||||||
|
for _ in range(self.num_block):
|
||||||
|
time.sleep(CHUNK_LENGTH / HOP_LENGTH)
|
||||||
|
t.update(1)
|
||||||
|
if not self.alive:
|
||||||
|
break
|
Loading…
Reference in a new issue