whispering/whispering/vad.py

#!/usr/bin/env python3

from typing import Iterator

import numpy as np
import torch
from whisper.audio import N_FRAMES, SAMPLE_RATE

from whispering.schema import SpeechSegment


class VAD:
    def __init__(
        self,
    ):
        self.vad_model, _ = torch.hub.load(
            repo_or_dir="snakers4/silero-vad",
            model="silero_vad",
        )

    def __call__(
        self,
        *,
        audio: np.ndarray,
        thredhold: float = 0.5,
    ) -> Iterator[SpeechSegment]:
        # audio.shape should be multiple of (N_FRAMES,)

        def my_ret(
            *,
            start_block_idx: int,
            idx: int,
        ) -> SpeechSegment:
            return SpeechSegment(
                start_block_idx=start_block_idx,
                end_block_idx=idx,
                audio=audio[N_FRAMES * start_block_idx : N_FRAMES * idx],
            )

        block_size: int = int(audio.shape[0] / N_FRAMES)

        start_block_idx = None
        for idx in range(block_size):
            start: int = N_FRAMES * idx
            end: int = N_FRAMES * (idx + 1)
            vad_prob = self.vad_model(
                torch.from_numpy(audio[start:end]),
                SAMPLE_RATE,
            ).item()
            if vad_prob > thredhold:
                if start_block_idx is None:
                    start_block_idx = idx
            else:
                if start_block_idx is not None:
                    yield my_ret(
                        start_block_idx=start_block_idx,
                        idx=idx,
                    )
                    start_block_idx = None
        if start_block_idx is not None:
            yield my_ret(
                start_block_idx=start_block_idx,
                idx=block_size,
            )
Add 2022-10-01 14:21:58 +00:00			`#!/usr/bin/env python3`

			`from typing import Iterator`

Fix 2022-10-02 10:47:17 +00:00			`import numpy as np`
Add 2022-10-01 14:21:58 +00:00			`import torch`
			`from whisper.audio import N_FRAMES, SAMPLE_RATE`

			`from whispering.schema import SpeechSegment`


			`class VAD:`
			`def __init__(`
			`self,`
			`):`
			`self.vad_model, _ = torch.hub.load(`
			`repo_or_dir="snakers4/silero-vad",`
			`model="silero_vad",`
			`)`

			`def __call__(`
			`self,`
			`*,`
Fix 2022-10-02 10:47:17 +00:00			`audio: np.ndarray,`
Add 2022-10-01 14:21:58 +00:00			`thredhold: float = 0.5,`
Merge remote-tracking branch 'origin/master' into vad 2022-10-02 10:39:33 +00:00			`) -> Iterator[SpeechSegment]:`
Fix 2022-10-02 10:47:17 +00:00			`# audio.shape should be multiple of (N_FRAMES,)`
Add 2022-10-01 14:21:58 +00:00
Merge remote-tracking branch 'origin/master' into vad 2022-10-02 10:39:33 +00:00			`def my_ret(`
			`*,`
			`start_block_idx: int,`
			`idx: int,`
			`) -> SpeechSegment:`
			`return SpeechSegment(`
			`start_block_idx=start_block_idx,`
			`end_block_idx=idx,`
Fix 2022-10-02 10:47:17 +00:00			`audio=audio[N_FRAMES * start_block_idx : N_FRAMES * idx],`
Merge remote-tracking branch 'origin/master' into vad 2022-10-02 10:39:33 +00:00			`)`

Fix 2022-10-02 10:47:17 +00:00			`block_size: int = int(audio.shape[0] / N_FRAMES)`
Add 2022-10-01 14:21:58 +00:00
			`start_block_idx = None`
Merge remote-tracking branch 'origin/master' into vad 2022-10-02 10:39:33 +00:00			`for idx in range(block_size):`
Add 2022-10-01 14:21:58 +00:00			`start: int = N_FRAMES * idx`
			`end: int = N_FRAMES * (idx + 1)`
			`vad_prob = self.vad_model(`
Fix 2022-10-02 10:47:17 +00:00			`torch.from_numpy(audio[start:end]),`
Add 2022-10-01 14:21:58 +00:00			`SAMPLE_RATE,`
			`).item()`
			`if vad_prob > thredhold:`
			`if start_block_idx is None:`
			`start_block_idx = idx`
			`else:`
			`if start_block_idx is not None:`
Merge remote-tracking branch 'origin/master' into vad 2022-10-02 10:39:33 +00:00			`yield my_ret(`
Add 2022-10-01 14:21:58 +00:00			`start_block_idx=start_block_idx,`
Merge remote-tracking branch 'origin/master' into vad 2022-10-02 10:39:33 +00:00			`idx=idx,`
Add 2022-10-01 14:21:58 +00:00			`)`
			`start_block_idx = None`
Merge remote-tracking branch 'origin/master' into vad 2022-10-02 10:39:33 +00:00			`if start_block_idx is not None:`
			`yield my_ret(`
			`start_block_idx=start_block_idx,`
			`idx=block_size,`
			`)`