2022-09-23 10:20:11 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2022-09-24 04:12:24 +00:00
|
|
|
from typing import List, Optional
|
2022-09-23 10:20:11 +00:00
|
|
|
|
|
|
|
from pydantic import BaseModel
|
|
|
|
|
|
|
|
|
|
|
|
class WhisperConfig(BaseModel):
|
|
|
|
model_name: str
|
|
|
|
device: str
|
|
|
|
language: str
|
|
|
|
|
2022-09-24 15:43:03 +00:00
|
|
|
allow_padding: bool = False
|
2022-09-24 04:12:24 +00:00
|
|
|
temperatures: List[float]
|
2022-09-23 13:39:27 +00:00
|
|
|
fp16: bool = True
|
2022-09-23 10:20:11 +00:00
|
|
|
compression_ratio_threshold: Optional[float] = 2.4
|
|
|
|
logprob_threshold: Optional[float] = -1.0
|
|
|
|
no_captions_threshold: Optional[float] = 0.6
|
|
|
|
best_of: int = 5
|
|
|
|
beam_size: Optional[int] = None
|
|
|
|
no_speech_threshold: Optional[float] = 0.6
|
|
|
|
logprob_threshold: Optional[float] = -1.0
|
|
|
|
compression_ratio_threshold: Optional[float] = 2.4
|
2022-09-23 11:03:00 +00:00
|
|
|
|
|
|
|
|
|
|
|
class ParsedChunk(BaseModel):
|
|
|
|
start: float
|
|
|
|
end: float
|
|
|
|
text: str
|
|
|
|
tokens: List[int]
|
|
|
|
temperature: float
|
|
|
|
avg_logprob: float
|
|
|
|
compression_ratio: float
|
|
|
|
no_speech_prob: float
|