mirror of
https://github.com/shirayu/whispering.git
synced 2024-11-25 10:21:00 +00:00
Add vad_threshold to Context
This commit is contained in:
parent
1a276d042d
commit
f5bd88ab6b
3 changed files with 3 additions and 1 deletions
|
@ -41,6 +41,7 @@ class Context(BaseModel, arbitrary_types_allowed=True):
|
|||
logprob_threshold: Optional[float] = -1.0
|
||||
compression_ratio_threshold: Optional[float] = 2.4
|
||||
buffer_threshold: Optional[float] = 0.5
|
||||
vad_threshold: float = 0.5
|
||||
|
||||
|
||||
class ParsedChunk(BaseModel):
|
||||
|
|
|
@ -240,6 +240,7 @@ class WhisperStreamingTranscriber:
|
|||
for v in self.vad(
|
||||
audio=audio,
|
||||
total_block_number=1,
|
||||
threshold=ctx.vad_threshold,
|
||||
)
|
||||
]
|
||||
if len(x) == 0: # No speech
|
||||
|
|
|
@ -22,7 +22,7 @@ class VAD:
|
|||
self,
|
||||
*,
|
||||
audio: np.ndarray,
|
||||
threshold: float = 0.5,
|
||||
threshold: float,
|
||||
total_block_number: Optional[int] = None,
|
||||
) -> Iterator[SpeechSegment]:
|
||||
# audio.shape should be multiple of (N_FRAMES,)
|
||||
|
|
Loading…
Reference in a new issue