Multi language feature (#20, Resolve #19)

Add "multi" to languages for multi language transcribing
This commit is contained in:
AlexandraRamassamy 2022-10-06 13:53:23 +02:00 committed by GitHub
parent 686627e1b5
commit a9452cde76
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 5 deletions

View file

@ -16,7 +16,7 @@ from whisper.audio import N_FRAMES, SAMPLE_RATE
from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
from whispering.pbar import ProgressBar from whispering.pbar import ProgressBar
from whispering.schema import Context, StdoutWriter, WhisperConfig from whispering.schema import MULTI_LANGUAGE, Context, StdoutWriter, WhisperConfig
from whispering.serve import serve_with_websocket from whispering.serve import serve_with_websocket
from whispering.transcriber import WhisperStreamingTranscriber from whispering.transcriber import WhisperStreamingTranscriber
from whispering.websocket_client import run_websocket_client from whispering.websocket_client import run_websocket_client
@ -106,7 +106,8 @@ def get_opts() -> argparse.Namespace:
"--language", "--language",
type=str, type=str,
default=None, default=None,
choices=sorted(LANGUAGES.keys()) choices=[MULTI_LANGUAGE]
+ sorted(LANGUAGES.keys())
+ sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]), + sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),
) )
group_model.add_argument( group_model.add_argument(
@ -205,7 +206,7 @@ def get_opts() -> argparse.Namespace:
def get_wshiper(*, opts) -> WhisperStreamingTranscriber: def get_wshiper(*, opts) -> WhisperStreamingTranscriber:
config = WhisperConfig( config = WhisperConfig(
model_name=opts.model, model_name=opts.model,
language=opts.language, language=opts.language if opts.language == MULTI_LANGUAGE else None,
device=opts.device, device=opts.device,
) )

View file

@ -1,17 +1,19 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import sys import sys
from typing import List, Optional from typing import Final, List, Optional
import numpy as np import numpy as np
import torch import torch
from pydantic import BaseModel, root_validator from pydantic import BaseModel, root_validator
MULTI_LANGUAGE: Final[str] = "multi"
class WhisperConfig(BaseModel): class WhisperConfig(BaseModel):
model_name: str model_name: str
device: str device: str
language: str language: Optional[str]
fp16: bool = True fp16: bool = True
@root_validator @root_validator