mirror of
https://github.com/shirayu/whispering.git
synced 2025-02-16 18:45:14 +00:00
Add vad_threshold to Context
This commit is contained in:
parent
1a276d042d
commit
f5bd88ab6b
3 changed files with 3 additions and 1 deletions
|
@ -41,6 +41,7 @@ class Context(BaseModel, arbitrary_types_allowed=True):
|
||||||
logprob_threshold: Optional[float] = -1.0
|
logprob_threshold: Optional[float] = -1.0
|
||||||
compression_ratio_threshold: Optional[float] = 2.4
|
compression_ratio_threshold: Optional[float] = 2.4
|
||||||
buffer_threshold: Optional[float] = 0.5
|
buffer_threshold: Optional[float] = 0.5
|
||||||
|
vad_threshold: float = 0.5
|
||||||
|
|
||||||
|
|
||||||
class ParsedChunk(BaseModel):
|
class ParsedChunk(BaseModel):
|
||||||
|
|
|
@ -240,6 +240,7 @@ class WhisperStreamingTranscriber:
|
||||||
for v in self.vad(
|
for v in self.vad(
|
||||||
audio=audio,
|
audio=audio,
|
||||||
total_block_number=1,
|
total_block_number=1,
|
||||||
|
threshold=ctx.vad_threshold,
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
if len(x) == 0: # No speech
|
if len(x) == 0: # No speech
|
||||||
|
|
|
@ -22,7 +22,7 @@ class VAD:
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
audio: np.ndarray,
|
audio: np.ndarray,
|
||||||
threshold: float = 0.5,
|
threshold: float,
|
||||||
total_block_number: Optional[int] = None,
|
total_block_number: Optional[int] = None,
|
||||||
) -> Iterator[SpeechSegment]:
|
) -> Iterator[SpeechSegment]:
|
||||||
# audio.shape should be multiple of (N_FRAMES,)
|
# audio.shape should be multiple of (N_FRAMES,)
|
||||||
|
|
Loading…
Reference in a new issue