Multi language feature (#20, Resolve #19)

Add "multi" to languages for multi language transcribing
This commit is contained in:
AlexandraRamassamy 2022-10-06 13:53:23 +02:00 committed by GitHub
parent 686627e1b5
commit a9452cde76
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 5 deletions

View file

@ -16,7 +16,7 @@ from whisper.audio import N_FRAMES, SAMPLE_RATE
from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
from whispering.pbar import ProgressBar
from whispering.schema import Context, StdoutWriter, WhisperConfig
from whispering.schema import MULTI_LANGUAGE, Context, StdoutWriter, WhisperConfig
from whispering.serve import serve_with_websocket
from whispering.transcriber import WhisperStreamingTranscriber
from whispering.websocket_client import run_websocket_client
@ -106,7 +106,8 @@ def get_opts() -> argparse.Namespace:
"--language",
type=str,
default=None,
choices=sorted(LANGUAGES.keys())
choices=[MULTI_LANGUAGE]
+ sorted(LANGUAGES.keys())
+ sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),
)
group_model.add_argument(
@ -205,7 +206,7 @@ def get_opts() -> argparse.Namespace:
def get_wshiper(*, opts) -> WhisperStreamingTranscriber:
config = WhisperConfig(
model_name=opts.model,
language=opts.language,
language=opts.language if opts.language == MULTI_LANGUAGE else None,
device=opts.device,
)

View file

@ -1,17 +1,19 @@
#!/usr/bin/env python3
import sys
from typing import List, Optional
from typing import Final, List, Optional
import numpy as np
import torch
from pydantic import BaseModel, root_validator
MULTI_LANGUAGE: Final[str] = "multi"
class WhisperConfig(BaseModel):
model_name: str
device: str
language: str
language: Optional[str]
fp16: bool = True
@root_validator