Add vad_threshold to Context

2025-03-28 04:55:28 +00:00 · 2022-10-02 20:41:53 +09:00 · 2022-10-02 20:41:53 +09:00 · f5bd88ab6b
commit f5bd88ab6b
parent 1a276d042d
3 changed files with 3 additions and 1 deletions
--- a/whispering/schema.py
+++ b/whispering/schema.py
@ -41,6 +41,7 @@ class Context(BaseModel, arbitrary_types_allowed=True):
    logprob_threshold: Optional[float] = -1.0
    compression_ratio_threshold: Optional[float] = 2.4
    buffer_threshold: Optional[float] = 0.5
+    vad_threshold: float = 0.5


 class ParsedChunk(BaseModel):
--- a/whispering/transcriber.py
+++ b/whispering/transcriber.py
@ -240,6 +240,7 @@ class WhisperStreamingTranscriber:
                for v in self.vad(
                    audio=audio,
                    total_block_number=1,
+                    threshold=ctx.vad_threshold,
                )
            ]
            if len(x) == 0:  # No speech
--- a/whispering/vad.py
+++ b/whispering/vad.py
@ -22,7 +22,7 @@ class VAD:
        self,
        *,
        audio: np.ndarray,
-        threshold: float = 0.5,
+        threshold: float,
        total_block_number: Optional[int] = None,
    ) -> Iterator[SpeechSegment]:
        # audio.shape should be multiple of (N_FRAMES,)