Add vad_threshold to Context

This commit is contained in:
Yuta Hayashibe 2022-10-02 20:41:53 +09:00
parent 1a276d042d
commit f5bd88ab6b
3 changed files with 3 additions and 1 deletions

View file

@ -41,6 +41,7 @@ class Context(BaseModel, arbitrary_types_allowed=True):
logprob_threshold: Optional[float] = -1.0 logprob_threshold: Optional[float] = -1.0
compression_ratio_threshold: Optional[float] = 2.4 compression_ratio_threshold: Optional[float] = 2.4
buffer_threshold: Optional[float] = 0.5 buffer_threshold: Optional[float] = 0.5
vad_threshold: float = 0.5
class ParsedChunk(BaseModel): class ParsedChunk(BaseModel):

View file

@ -240,6 +240,7 @@ class WhisperStreamingTranscriber:
for v in self.vad( for v in self.vad(
audio=audio, audio=audio,
total_block_number=1, total_block_number=1,
threshold=ctx.vad_threshold,
) )
] ]
if len(x) == 0: # No speech if len(x) == 0: # No speech

View file

@ -22,7 +22,7 @@ class VAD:
self, self,
*, *,
audio: np.ndarray, audio: np.ndarray,
threshold: float = 0.5, threshold: float,
total_block_number: Optional[int] = None, total_block_number: Optional[int] = None,
) -> Iterator[SpeechSegment]: ) -> Iterator[SpeechSegment]:
# audio.shape should be multiple of (N_FRAMES,) # audio.shape should be multiple of (N_FRAMES,)