mirror of
https://github.com/shirayu/whispering.git
synced 2024-05-20 01:08:14 +00:00
Compare commits
114 commits
Author | SHA1 | Date | |
---|---|---|---|
184056ad46 | |||
f5bf8082a7 | |||
176e3be86e | |||
98a5795176 | |||
48e949ac92 | |||
64d2ab96a8 | |||
3bfad8c3a7 | |||
cbf5152c1d | |||
77ff078342 | |||
b64e5683ab | |||
1150670d07 | |||
57db63ddde | |||
64b5457260 | |||
a2874a9bf4 | |||
6590938bd4 | |||
b7fce2452b | |||
78c573ccf2 | |||
659fc6d725 | |||
f2a4e34aea | |||
b426081091 | |||
33439c8f08 | |||
8b3c7c461f | |||
be9de92a33 | |||
2c9aa5c4c2 | |||
22fcbfcea0 | |||
eed05b012a | |||
1b8190f114 | |||
77653643c6 | |||
472a02712d | |||
1eadec5466 | |||
3c5abc0f8e | |||
43556b576d | |||
266d2145bb | |||
ec96c4efaf | |||
1632c3a130 | |||
0293b09b1d | |||
8701485d1c | |||
403c52bf7b | |||
be193c2b3a | |||
02ebc1ef97 | |||
d1fc731c1d | |||
88a5e9ffd6 | |||
9ec070d5f6 | |||
7334c40413 | |||
1de4728ee5 | |||
3e696133b9 | |||
082cfb51a9 | |||
c187e9c393 | |||
564fbff162 | |||
facb2dc738 | |||
9cc31848de | |||
53970f3b51 | |||
8b464ff85d | |||
74ed23a9e3 | |||
f3c9d8ad10 | |||
71ee56c7bd | |||
cbf82f3bf7 | |||
bbdbad9bec | |||
edf964f707 | |||
783f366985 | |||
f3057cc28f | |||
b5cfb8c708 | |||
50f9f02398 | |||
3bb66928bf | |||
1fe675b553 | |||
7b615c5e84 | |||
9b9ef542b7 | |||
f5b93ae3be | |||
98060c4ca0 | |||
2dd2c6c036 | |||
b3dd46c96f | |||
075451aaae | |||
df831a4f8d | |||
f606634d27 | |||
d174967f5c | |||
183edafc41 | |||
4fee6ee71a | |||
63b02f7c10 | |||
40dd471f6b | |||
62b6d9a3b0 | |||
256bf38b4d | |||
5eb529aa79 | |||
660dd3d48a | |||
d32073b27d | |||
75147cae86 | |||
20b8970aa9 | |||
86f38c6ca9 | |||
dce9719fea | |||
3d293c868c | |||
ae1dbd721c | |||
91231811e7 | |||
cc01b86af5 | |||
d6d94329e6 | |||
6fcf246926 | |||
3190706bcf | |||
b12ca01887 | |||
271c3e96ba | |||
3e647ae850 | |||
250a245c53 | |||
b282f88cfb | |||
177fc3d353 | |||
84f8d0dfe7 | |||
9e4b88fb6a | |||
0cd6425a4f | |||
39ace87958 | |||
a2c68541dc | |||
8545a85cbf | |||
5f93fb5820 | |||
a4405f8b9a | |||
3d7c24797f | |||
fab5682f5f | |||
f88d8075b6 | |||
3f47282ce1 | |||
bd0446c197 |
17
.github/ISSUE_TEMPLATE/question.md
vendored
17
.github/ISSUE_TEMPLATE/question.md
vendored
|
@ -7,4 +7,21 @@ assignees: ''
|
|||
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
<!-- A clear and concise description of what you expected to happen. -->
|
||||
|
||||
## Logs (Optional)
|
||||
<!-- Run with ``--debug``, you can get detailed logs -->
|
||||
|
||||
## Environment
|
||||
|
||||
<!-- Please clarify these if it is helpful -->
|
||||
|
||||
- OS:
|
||||
- Python Version:
|
||||
- Whispering version:
|
||||
|
||||
## Additional context
|
||||
|
||||
<!-- Add any other context about the problem here. -->
|
||||
|
|
6
.github/dependabot.yml
vendored
6
.github/dependabot.yml
vendored
|
@ -4,12 +4,12 @@ updates:
|
|||
- package-ecosystem: "npm"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
interval: "monthly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
interval: "monthly"
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
interval: "monthly"
|
||||
|
|
23
.github/workflows/ci.yml
vendored
23
.github/workflows/ci.yml
vendored
|
@ -10,15 +10,28 @@ on: # yamllint disable-line rule:truthy
|
|||
|
||||
jobs:
|
||||
ci:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
python-version: ["3.8", "3.9", "3.10"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Get tags
|
||||
run: git fetch --tags origin -f
|
||||
|
||||
- uses: awalsh128/cache-apt-pkgs-action@v1.3.0
|
||||
if: runner.os == 'Linux'
|
||||
with:
|
||||
packages: portaudio19-dev
|
||||
version: 1.0
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.9'
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install Poetry Action
|
||||
uses: snok/install-poetry@v1.3.1
|
||||
uses: snok/install-poetry@v1.3.3
|
||||
with:
|
||||
virtualenvs-create: true
|
||||
virtualenvs-in-project: true
|
||||
|
@ -28,14 +41,14 @@ jobs:
|
|||
uses: actions/cache@v3
|
||||
with:
|
||||
path: .venv
|
||||
key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('**/package-lock.json') }}-2
|
||||
key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('**/package-lock.json') }}-2
|
||||
|
||||
- run: pip install poetry
|
||||
- run: poetry install
|
||||
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: '14'
|
||||
node-version: '16'
|
||||
cache: npm
|
||||
- run: npm install
|
||||
- run: poetry run make -j $(nproc)
|
||||
|
|
21
.github/workflows/stale-issues.yml
vendored
Normal file
21
.github/workflows/stale-issues.yml
vendored
Normal file
|
@ -0,0 +1,21 @@
|
|||
name: Close inactive issues
|
||||
on:
|
||||
schedule:
|
||||
- cron: "45 1 * * *"
|
||||
|
||||
jobs:
|
||||
close-issues:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/stale@v8.0.0
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
stale-issue-message: "This issue is stale because it has been open for 21 days with no activity."
|
||||
close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
|
||||
stale-issue-label: "Status: Stale"
|
||||
only-labels: "Type: Question"
|
||||
exempt-issue-labels: "Status: In Progress"
|
||||
days-before-issue-stale: 21
|
||||
days-before-issue-close: 14
|
||||
days-before-pr-stale: -1
|
||||
days-before-pr-close: -1
|
2
.github/workflows/typos.yml
vendored
2
.github/workflows/typos.yml
vendored
|
@ -18,4 +18,4 @@ jobs:
|
|||
- uses: actions/checkout@v3
|
||||
|
||||
- name: typos-action
|
||||
uses: crate-ci/typos@v1.12.8
|
||||
uses: crate-ci/typos@v1.14.3
|
||||
|
|
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -162,3 +162,5 @@ cython_debug/
|
|||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
.vscode/
|
14
Makefile
14
Makefile
|
@ -8,20 +8,22 @@ TARGET_DIRS:=./whispering
|
|||
flake8:
|
||||
find $(TARGET_DIRS) | grep '\.py$$' | xargs flake8
|
||||
black:
|
||||
find $(TARGET_DIRS) | grep '\.py$$' | xargs black --diff | diff /dev/null -
|
||||
find $(TARGET_DIRS) | grep '\.py$$' | xargs black --diff | python ./scripts/check_null.py
|
||||
isort:
|
||||
find $(TARGET_DIRS) | grep '\.py$$' | xargs isort --diff | diff /dev/null -
|
||||
find $(TARGET_DIRS) | grep '\.py$$' | xargs isort --diff | python ./scripts/check_null.py
|
||||
pydocstyle:
|
||||
find $(TARGET_DIRS) | grep -v tests | xargs pydocstyle --ignore=D100,D101,D102,D103,D104,D105,D107,D203,D212
|
||||
pytest:
|
||||
pytest
|
||||
|
||||
yamllint:
|
||||
find . \( -name node_modules -o -name .venv \) -prune -o -type f -name '*.yml' -print \
|
||||
| xargs yamllint --no-warnings
|
||||
|
||||
version_check:
|
||||
git tag | python ./scripts/check_version.py --toml pyproject.toml -i README.md --tags /dev/stdin
|
||||
git tag | python ./scripts/check_version.py --toml pyproject.toml -i README.md --tags -
|
||||
|
||||
lint_python: flake8 black isort pydocstyle version_check
|
||||
lint_python: flake8 black isort pydocstyle version_check pytest
|
||||
|
||||
|
||||
pyright:
|
||||
|
@ -33,3 +35,7 @@ markdownlint:
|
|||
|
||||
lint_node: markdownlint pyright
|
||||
|
||||
|
||||
style:
|
||||
find $(TARGET_DIRS) | grep '\.py$$' | xargs black
|
||||
find $(TARGET_DIRS) | grep '\.py$$' | xargs isort
|
||||
|
|
94
README.md
94
README.md
|
@ -1,8 +1,8 @@
|
|||
|
||||
# Whispering
|
||||
|
||||
[![MIT License](https://img.shields.io/apm/l/atomic-design-ui.svg?)](LICENSE)
|
||||
![Python Versions](https://img.shields.io/badge/Python-3.8%20--%203.10-blue)
|
||||
[![MIT License](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
|
||||
![Python Versions](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10-blue)
|
||||
|
||||
[![CI](https://github.com/shirayu/whispering/actions/workflows/ci.yml/badge.svg)](https://github.com/shirayu/whispering/actions/workflows/ci.yml)
|
||||
[![CodeQL](https://github.com/shirayu/whispering/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/shirayu/whispering/actions/workflows/codeql-analysis.yml)
|
||||
|
@ -11,10 +11,18 @@
|
|||
Streaming transcriber with [whisper](https://github.com/openai/whisper).
|
||||
Enough machine power is needed to transcribe in real time.
|
||||
|
||||
## Notice
|
||||
|
||||
This repository has been archived.
|
||||
There are some alternatives.
|
||||
|
||||
- <https://github.com/guillaumekln/faster-whisper>
|
||||
- <https://github.com/ggerganov/whisper.cpp>
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
pip install -U git+https://github.com/shirayu/whispering.git@v0.5.0
|
||||
pip install -U git+https://github.com/shirayu/whispering.git@v0.6.6
|
||||
|
||||
# If you use GPU, install proper torch and torchaudio
|
||||
# Check https://pytorch.org/get-started/locally/
|
||||
|
@ -22,34 +30,41 @@ pip install -U git+https://github.com/shirayu/whispering.git@v0.5.0
|
|||
pip install -U torch torchaudio --extra-index-url https://download.pytorch.org/whl/cu116
|
||||
```
|
||||
|
||||
If you get ``OSError: PortAudio library not found`` in Linux, install "PortAudio".
|
||||
|
||||
```bash
|
||||
sudo apt -y install portaudio19-dev
|
||||
```
|
||||
|
||||
## Example of microphone
|
||||
|
||||
```bash
|
||||
# Run in English
|
||||
# By the default, it needs to wait at least 30 seconds
|
||||
whispering --language en --model tiny
|
||||
```
|
||||
|
||||
- ``--help`` shows full options
|
||||
- ``--model`` set the [model name](https://github.com/openai/whisper#available-models-and-languages) to use. Larger models will be more accurate, but may not be able to transcribe in real time.
|
||||
- ``--model`` sets the [model name](https://github.com/openai/whisper#available-models-and-languages) to use. Larger models will be more accurate, but may not be able to transcribe in real time.
|
||||
- ``--language`` sets the language to transcribe. The list of languages are shown with ``whispering -h``
|
||||
- ``--no-progress`` disables the progress message
|
||||
- ``-t`` sets temperatures to decode. You can set several like ``-t 0.0 -t 0.1 -t 0.5``, but too many temperatures exhaust decoding time
|
||||
- ``--debug`` outputs logs for debug
|
||||
- ``--no-vad`` disables VAD (Voice Activity Detection). This forces whisper to analyze non-voice activity sound period
|
||||
- ``--vad`` sets VAD (Voice Activity Detection) threshold. The default is ``0.5``. ``0`` disables VAD and forces whisper to analyze non-voice activity sound period. Try ``--vad 0`` if VAD prevents transcription.
|
||||
- ``--output`` sets output file (Default: Standard output)
|
||||
- ``--frame``: the number of minimum frames of mel spectrogram input for Whisper (default: ``3000``. i.e. 30 seconds)
|
||||
|
||||
### Parse interval
|
||||
|
||||
Without ``--allow-padding``, whispering just performs VAD for the period,
|
||||
and when it is predicted as "silence", it will not be passed to whisper.
|
||||
If you want to change the VAD interval, change ``-n``.
|
||||
By default, whispering performs VAD for every 3.75 second.
|
||||
This interval is determined by the value of ``-n`` and its default is ``20``.
|
||||
When an interval is predicted as "silence", it will not be passed to whisper.
|
||||
If you want to disable VAD, please make VAD threshold 0 by adding ``--vad 0``.
|
||||
|
||||
If you want quick response, set small ``-n`` and add ``--allow-padding``.
|
||||
However, this may sacrifice the accuracy.
|
||||
|
||||
```bash
|
||||
whispering --language en --model tiny -n 20 --allow-padding
|
||||
```
|
||||
By default, whispering does not perform analysis until the total length of the segments determined by VAD to have speech exceeds 30 seconds.
|
||||
This is because the original Whisper assumes that the inputs are 30 seconds segments.
|
||||
However, if silence segments appear 16 times (the default value of ``--max_nospeech_skip``) after speech is detected, the analysis is performed.
|
||||
You can make the length of segments smaller with ``--frame`` option (default: 3000), but it sacrifices accuracy because this is not expected input for Whisper.
|
||||
|
||||
## Example of web socket
|
||||
|
||||
|
@ -69,18 +84,53 @@ whispering --language en --model tiny --host 0.0.0.0 --port 8000
|
|||
whispering --host ADDRESS_OF_HOST --port 8000 --mode client
|
||||
```
|
||||
|
||||
You can set ``-n``, ``--allow-padding`` and other options.
|
||||
You can set ``-n`` and other options.
|
||||
|
||||
## Tips
|
||||
## For Developers
|
||||
|
||||
## PortAudio Error
|
||||
1. Install [Python](https://www.python.org/) and [Node.js](https://nodejs.org/)
|
||||
2. [Install poetry](https://python-poetry.org/docs/) to use ``poetry`` command
|
||||
3. Clone and install libraries
|
||||
|
||||
If you get ``OSError: PortAudio library not found``: Install ``portaudio``
|
||||
```console
|
||||
# Clone
|
||||
git clone https://github.com/shirayu/whispering.git
|
||||
|
||||
```bash
|
||||
# Ubuntu
|
||||
sudo apt-get install portaudio19-dev
|
||||
```
|
||||
# With poetry
|
||||
poetry config virtualenvs.in-project true
|
||||
poetry install --all-extras
|
||||
poetry run pip install -U torch torchaudio --extra-index-url https://download.pytorch.org/whl/cu116
|
||||
|
||||
# With npm
|
||||
npm install
|
||||
```
|
||||
|
||||
4. Run test and check that no errors occur
|
||||
|
||||
```bash
|
||||
poetry run make -j4
|
||||
```
|
||||
|
||||
5. Make fancy updates
|
||||
6. Make style
|
||||
|
||||
```bash
|
||||
poetry run make style
|
||||
```
|
||||
|
||||
7. Run test again and check that no errors occur
|
||||
|
||||
```bash
|
||||
poetry run make -j4
|
||||
```
|
||||
|
||||
8. Check typos by using [typos](https://github.com/crate-ci/typos). Just run ``typos`` command in the root directory.
|
||||
|
||||
```bash
|
||||
typos
|
||||
```
|
||||
|
||||
9. Send Pull requests!
|
||||
|
||||
## License
|
||||
|
||||
|
|
311
package-lock.json
generated
311
package-lock.json
generated
|
@ -1,15 +1,15 @@
|
|||
{
|
||||
"name": "pyright-exec",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 2,
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "pyright-exec",
|
||||
"version": "1.0.0",
|
||||
"devDependencies": {
|
||||
"markdownlint-cli": "^0.32.1",
|
||||
"pyright": "^1.1.273"
|
||||
"markdownlint-cli": "^0.33.0",
|
||||
"pyright": "^1.1.301"
|
||||
}
|
||||
},
|
||||
"node_modules/argparse": {
|
||||
|
@ -34,9 +34,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/commander": {
|
||||
"version": "9.4.0",
|
||||
"resolved": "https://registry.npmjs.org/commander/-/commander-9.4.0.tgz",
|
||||
"integrity": "sha512-sRPT+umqkz90UA8M1yqYfnHlZA7fF6nSphDtxeywPZ49ysjxDQybzk13CL+mXekDRG92skbcqCLVovuCusNmFw==",
|
||||
"version": "9.4.1",
|
||||
"resolved": "https://registry.npmjs.org/commander/-/commander-9.4.1.tgz",
|
||||
"integrity": "sha512-5EEkTNyHNGFPD2H+c/dXXfQZYa/scCKasxWcXJaWnNJ99pnQN9Vnmqow+p+PlFPE63Q6mThaZws1T+HxfpgtPw==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": "^12.20.0 || >=14"
|
||||
|
@ -101,9 +101,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/ignore": {
|
||||
"version": "5.2.0",
|
||||
"resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.0.tgz",
|
||||
"integrity": "sha512-CmxgYGiEPCLhfLnpPp1MoRmifwEIOgjcHXxOBjv7mY96c+eWScsOP9c112ZyLdWHi0FxHjI+4uVhKYp/gcdRmQ==",
|
||||
"version": "5.2.4",
|
||||
"resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.4.tgz",
|
||||
"integrity": "sha512-MAb38BcSbH0eHNBxn7ql2NH/kX33OkB3lZ1BNdh7ENeRChHTYsTvWrMubiIAMNS2llXEEgZ1MUOBtXChP3kaFQ==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">= 4"
|
||||
|
@ -147,9 +147,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/jsonc-parser": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.1.0.tgz",
|
||||
"integrity": "sha512-DRf0QjnNeCUds3xTjKlQQ3DpJD51GvDjJfnxUVWg6PZTo2otSm+slzNAxU/35hF8/oJIKoG9slq30JYOsF2azg==",
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.2.0.tgz",
|
||||
"integrity": "sha512-gfFQZrcTc8CnKXp6Y4/CBT3fTc0OVuDofpre4aEeEpSBPV5X5v4+Vmx+8snU7RLPrNHPKSgLxGo9YuQzz20o+w==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/linkify-it": {
|
||||
|
@ -178,32 +178,31 @@
|
|||
}
|
||||
},
|
||||
"node_modules/markdownlint": {
|
||||
"version": "0.26.2",
|
||||
"resolved": "https://registry.npmjs.org/markdownlint/-/markdownlint-0.26.2.tgz",
|
||||
"integrity": "sha512-2Am42YX2Ex5SQhRq35HxYWDfz1NLEOZWWN25nqd2h3AHRKsGRE+Qg1gt1++exW792eXTrR4jCNHfShfWk9Nz8w==",
|
||||
"version": "0.27.0",
|
||||
"resolved": "https://registry.npmjs.org/markdownlint/-/markdownlint-0.27.0.tgz",
|
||||
"integrity": "sha512-HtfVr/hzJJmE0C198F99JLaeada+646B5SaG2pVoEakLFI6iRGsvMqrnnrflq8hm1zQgwskEgqSnhDW11JBp0w==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"markdown-it": "13.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
"node": ">=14.18.0"
|
||||
}
|
||||
},
|
||||
"node_modules/markdownlint-cli": {
|
||||
"version": "0.32.2",
|
||||
"resolved": "https://registry.npmjs.org/markdownlint-cli/-/markdownlint-cli-0.32.2.tgz",
|
||||
"integrity": "sha512-xmJT1rGueUgT4yGNwk6D0oqQr90UJ7nMyakXtqjgswAkEhYYqjHew9RY8wDbOmh2R270IWjuKSeZzHDEGPAUkQ==",
|
||||
"version": "0.33.0",
|
||||
"resolved": "https://registry.npmjs.org/markdownlint-cli/-/markdownlint-cli-0.33.0.tgz",
|
||||
"integrity": "sha512-zMK1oHpjYkhjO+94+ngARiBBrRDEUMzooDHBAHtmEIJ9oYddd9l3chCReY2mPlecwH7gflQp1ApilTo+o0zopQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"commander": "~9.4.0",
|
||||
"commander": "~9.4.1",
|
||||
"get-stdin": "~9.0.0",
|
||||
"glob": "~8.0.3",
|
||||
"ignore": "~5.2.0",
|
||||
"ignore": "~5.2.4",
|
||||
"js-yaml": "^4.1.0",
|
||||
"jsonc-parser": "~3.1.0",
|
||||
"markdownlint": "~0.26.2",
|
||||
"markdownlint-rule-helpers": "~0.17.2",
|
||||
"minimatch": "~5.1.0",
|
||||
"jsonc-parser": "~3.2.0",
|
||||
"markdownlint": "~0.27.0",
|
||||
"minimatch": "~5.1.2",
|
||||
"run-con": "~1.2.11"
|
||||
},
|
||||
"bin": {
|
||||
|
@ -213,15 +212,6 @@
|
|||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/markdownlint-rule-helpers": {
|
||||
"version": "0.17.2",
|
||||
"resolved": "https://registry.npmjs.org/markdownlint-rule-helpers/-/markdownlint-rule-helpers-0.17.2.tgz",
|
||||
"integrity": "sha512-XaeoW2NYSlWxMCZM2B3H7YTG6nlaLfkEZWMBhr4hSPlq9MuY2sy83+Xr89jXOqZMZYjvi5nBCGoFh7hHoPKZmA==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/mdurl": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz",
|
||||
|
@ -229,9 +219,9 @@
|
|||
"dev": true
|
||||
},
|
||||
"node_modules/minimatch": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.0.tgz",
|
||||
"integrity": "sha512-9TPBGGak4nHfGZsPBohm9AWg6NoT7QTCehS3BIJABslyZbzxfV78QM2Y6+i741OPZIafFAaiiEMh5OyIrJPgtg==",
|
||||
"version": "5.1.6",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz",
|
||||
"integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"brace-expansion": "^2.0.1"
|
||||
|
@ -241,10 +231,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/minimist": {
|
||||
"version": "1.2.6",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
|
||||
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
|
||||
"dev": true
|
||||
"version": "1.2.8",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
|
||||
"integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
|
||||
"dev": true,
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/once": {
|
||||
"version": "1.4.0",
|
||||
|
@ -256,9 +249,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/pyright": {
|
||||
"version": "1.1.273",
|
||||
"resolved": "https://registry.npmjs.org/pyright/-/pyright-1.1.273.tgz",
|
||||
"integrity": "sha512-uhBqKtRnC1Rvgz7uKp13VEwIR/UuqUvlscOu/y6hQhDzpFrZi0Gft7TrSLIMdy7fRAf85dS1nduQmAIWXgl4AA==",
|
||||
"version": "1.1.301",
|
||||
"resolved": "https://registry.npmjs.org/pyright/-/pyright-1.1.301.tgz",
|
||||
"integrity": "sha512-Y4MMELxQ/5+/FlWjbQTg5wbP3z+V4IyFcATSsNLpZbJm0y4gz6ijf/b0zZV1sA8yJstf6xJ98vw5qxPM0yU8Zg==",
|
||||
"dev": true,
|
||||
"bin": {
|
||||
"pyright": "index.js",
|
||||
|
@ -307,235 +300,5 @@
|
|||
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
|
||||
"dev": true
|
||||
}
|
||||
},
|
||||
"dependencies": {
|
||||
"argparse": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
|
||||
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
|
||||
"dev": true
|
||||
},
|
||||
"balanced-match": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
|
||||
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
|
||||
"dev": true
|
||||
},
|
||||
"brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"commander": {
|
||||
"version": "9.4.0",
|
||||
"resolved": "https://registry.npmjs.org/commander/-/commander-9.4.0.tgz",
|
||||
"integrity": "sha512-sRPT+umqkz90UA8M1yqYfnHlZA7fF6nSphDtxeywPZ49ysjxDQybzk13CL+mXekDRG92skbcqCLVovuCusNmFw==",
|
||||
"dev": true
|
||||
},
|
||||
"deep-extend": {
|
||||
"version": "0.6.0",
|
||||
"resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
|
||||
"integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
|
||||
"dev": true
|
||||
},
|
||||
"entities": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/entities/-/entities-3.0.1.tgz",
|
||||
"integrity": "sha512-WiyBqoomrwMdFG1e0kqvASYfnlb0lp8M5o5Fw2OFq1hNZxxcNk8Ik0Xm7LxzBhuidnZB/UtBqVCgUz3kBOP51Q==",
|
||||
"dev": true
|
||||
},
|
||||
"fs.realpath": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
|
||||
"integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
|
||||
"dev": true
|
||||
},
|
||||
"get-stdin": {
|
||||
"version": "9.0.0",
|
||||
"resolved": "https://registry.npmjs.org/get-stdin/-/get-stdin-9.0.0.tgz",
|
||||
"integrity": "sha512-dVKBjfWisLAicarI2Sf+JuBE/DghV4UzNAVe9yhEJuzeREd3JhOTE9cUaJTeSa77fsbQUK3pcOpJfM59+VKZaA==",
|
||||
"dev": true
|
||||
},
|
||||
"glob": {
|
||||
"version": "8.0.3",
|
||||
"resolved": "https://registry.npmjs.org/glob/-/glob-8.0.3.tgz",
|
||||
"integrity": "sha512-ull455NHSHI/Y1FqGaaYFaLGkNMMJbavMrEGFXG/PGrg6y7sutWHUHrz6gy6WEBH6akM1M414dWKCNs+IhKdiQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"fs.realpath": "^1.0.0",
|
||||
"inflight": "^1.0.4",
|
||||
"inherits": "2",
|
||||
"minimatch": "^5.0.1",
|
||||
"once": "^1.3.0"
|
||||
}
|
||||
},
|
||||
"ignore": {
|
||||
"version": "5.2.0",
|
||||
"resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.0.tgz",
|
||||
"integrity": "sha512-CmxgYGiEPCLhfLnpPp1MoRmifwEIOgjcHXxOBjv7mY96c+eWScsOP9c112ZyLdWHi0FxHjI+4uVhKYp/gcdRmQ==",
|
||||
"dev": true
|
||||
},
|
||||
"inflight": {
|
||||
"version": "1.0.6",
|
||||
"resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
|
||||
"integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"once": "^1.3.0",
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"inherits": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
|
||||
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
|
||||
"dev": true
|
||||
},
|
||||
"ini": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/ini/-/ini-3.0.1.tgz",
|
||||
"integrity": "sha512-it4HyVAUTKBc6m8e1iXWvXSTdndF7HbdN713+kvLrymxTaU4AUBWrJ4vEooP+V7fexnVD3LKcBshjGGPefSMUQ==",
|
||||
"dev": true
|
||||
},
|
||||
"js-yaml": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
|
||||
"integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"argparse": "^2.0.1"
|
||||
}
|
||||
},
|
||||
"jsonc-parser": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.1.0.tgz",
|
||||
"integrity": "sha512-DRf0QjnNeCUds3xTjKlQQ3DpJD51GvDjJfnxUVWg6PZTo2otSm+slzNAxU/35hF8/oJIKoG9slq30JYOsF2azg==",
|
||||
"dev": true
|
||||
},
|
||||
"linkify-it": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-4.0.1.tgz",
|
||||
"integrity": "sha512-C7bfi1UZmoj8+PQx22XyeXCuBlokoyWQL5pWSP+EI6nzRylyThouddufc2c1NDIcP9k5agmN9fLpA7VNJfIiqw==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"uc.micro": "^1.0.1"
|
||||
}
|
||||
},
|
||||
"markdown-it": {
|
||||
"version": "13.0.1",
|
||||
"resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-13.0.1.tgz",
|
||||
"integrity": "sha512-lTlxriVoy2criHP0JKRhO2VDG9c2ypWCsT237eDiLqi09rmbKoUetyGHq2uOIRoRS//kfoJckS0eUzzkDR+k2Q==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"argparse": "^2.0.1",
|
||||
"entities": "~3.0.1",
|
||||
"linkify-it": "^4.0.1",
|
||||
"mdurl": "^1.0.1",
|
||||
"uc.micro": "^1.0.5"
|
||||
}
|
||||
},
|
||||
"markdownlint": {
|
||||
"version": "0.26.2",
|
||||
"resolved": "https://registry.npmjs.org/markdownlint/-/markdownlint-0.26.2.tgz",
|
||||
"integrity": "sha512-2Am42YX2Ex5SQhRq35HxYWDfz1NLEOZWWN25nqd2h3AHRKsGRE+Qg1gt1++exW792eXTrR4jCNHfShfWk9Nz8w==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"markdown-it": "13.0.1"
|
||||
}
|
||||
},
|
||||
"markdownlint-cli": {
|
||||
"version": "0.32.2",
|
||||
"resolved": "https://registry.npmjs.org/markdownlint-cli/-/markdownlint-cli-0.32.2.tgz",
|
||||
"integrity": "sha512-xmJT1rGueUgT4yGNwk6D0oqQr90UJ7nMyakXtqjgswAkEhYYqjHew9RY8wDbOmh2R270IWjuKSeZzHDEGPAUkQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"commander": "~9.4.0",
|
||||
"get-stdin": "~9.0.0",
|
||||
"glob": "~8.0.3",
|
||||
"ignore": "~5.2.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
"jsonc-parser": "~3.1.0",
|
||||
"markdownlint": "~0.26.2",
|
||||
"markdownlint-rule-helpers": "~0.17.2",
|
||||
"minimatch": "~5.1.0",
|
||||
"run-con": "~1.2.11"
|
||||
}
|
||||
},
|
||||
"markdownlint-rule-helpers": {
|
||||
"version": "0.17.2",
|
||||
"resolved": "https://registry.npmjs.org/markdownlint-rule-helpers/-/markdownlint-rule-helpers-0.17.2.tgz",
|
||||
"integrity": "sha512-XaeoW2NYSlWxMCZM2B3H7YTG6nlaLfkEZWMBhr4hSPlq9MuY2sy83+Xr89jXOqZMZYjvi5nBCGoFh7hHoPKZmA==",
|
||||
"dev": true
|
||||
},
|
||||
"mdurl": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz",
|
||||
"integrity": "sha512-/sKlQJCBYVY9Ers9hqzKou4H6V5UWc/M59TH2dvkt+84itfnq7uFOMLpOiOS4ujvHP4etln18fmIxA5R5fll0g==",
|
||||
"dev": true
|
||||
},
|
||||
"minimatch": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.0.tgz",
|
||||
"integrity": "sha512-9TPBGGak4nHfGZsPBohm9AWg6NoT7QTCehS3BIJABslyZbzxfV78QM2Y6+i741OPZIafFAaiiEMh5OyIrJPgtg==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"brace-expansion": "^2.0.1"
|
||||
}
|
||||
},
|
||||
"minimist": {
|
||||
"version": "1.2.6",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
|
||||
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
|
||||
"dev": true
|
||||
},
|
||||
"once": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
|
||||
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"pyright": {
|
||||
"version": "1.1.273",
|
||||
"resolved": "https://registry.npmjs.org/pyright/-/pyright-1.1.273.tgz",
|
||||
"integrity": "sha512-uhBqKtRnC1Rvgz7uKp13VEwIR/UuqUvlscOu/y6hQhDzpFrZi0Gft7TrSLIMdy7fRAf85dS1nduQmAIWXgl4AA==",
|
||||
"dev": true
|
||||
},
|
||||
"run-con": {
|
||||
"version": "1.2.11",
|
||||
"resolved": "https://registry.npmjs.org/run-con/-/run-con-1.2.11.tgz",
|
||||
"integrity": "sha512-NEMGsUT+cglWkzEr4IFK21P4Jca45HqiAbIIZIBdX5+UZTB24Mb/21iNGgz9xZa8tL6vbW7CXmq7MFN42+VjNQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"deep-extend": "^0.6.0",
|
||||
"ini": "~3.0.0",
|
||||
"minimist": "^1.2.6",
|
||||
"strip-json-comments": "~3.1.1"
|
||||
}
|
||||
},
|
||||
"strip-json-comments": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
|
||||
"integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==",
|
||||
"dev": true
|
||||
},
|
||||
"uc.micro": {
|
||||
"version": "1.0.6",
|
||||
"resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-1.0.6.tgz",
|
||||
"integrity": "sha512-8Y75pvTYkLJW2hWQHXxoqRgV7qb9B+9vFEtidML+7koHUFapnVJAZ6cKs+Qjz5Aw3aZWHMC6u0wJE3At+nSGwA==",
|
||||
"dev": true
|
||||
},
|
||||
"wrappy": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
|
||||
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
"version": "1.0.0",
|
||||
"dependencies": {},
|
||||
"devDependencies": {
|
||||
"markdownlint-cli": "^0.32.1",
|
||||
"pyright": "^1.1.273"
|
||||
"markdownlint-cli": "^0.33.0",
|
||||
"pyright": "^1.1.301"
|
||||
}
|
||||
}
|
||||
|
|
2390
poetry.lock
generated
2390
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -1,9 +1,9 @@
|
|||
[misc]
|
||||
stable_version = "0.5.0"
|
||||
stable_version = "0.6.6"
|
||||
|
||||
[tool.poetry]
|
||||
name = "whispering"
|
||||
version = "0.5.0"
|
||||
version = "0.6.7pre0"
|
||||
description = "Streaming transcriber with whisper"
|
||||
license = "MIT"
|
||||
authors = ["Yuta Hayashibe <yuta@hayashibe.jp>"]
|
||||
|
@ -13,13 +13,17 @@ packages = [{include = "whispering"}]
|
|||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8,<3.11"
|
||||
whisper = {git = "https://github.com/openai/whisper.git", rev = '9e653bd0ea0f1e9493cb4939733e9de249493cfb'}
|
||||
whisper = {git = "https://github.com/openai/whisper.git", rev = 'fd8f80c8b880dd7c284c109ca7f03dbe978bc532'}
|
||||
sounddevice = "^0.4.5"
|
||||
pydantic = "^1.10.2"
|
||||
websockets = "^10.3"
|
||||
pydantic = "^1.10.4"
|
||||
websockets = "^10.4"
|
||||
tqdm = "*"
|
||||
torchaudio = "^0.12.1"
|
||||
torchaudio = "^0.13.1"
|
||||
torch = ">=1.13.1"
|
||||
PySoundFile = {version = "^0.9.0.post1", platform = "windows"}
|
||||
numpy = "^1.24.2"
|
||||
ffmpeg-python = "^0.2.0"
|
||||
transformers = "^4.26.1"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
|
@ -28,9 +32,10 @@ isort = ">=5.10.1"
|
|||
flake8 = ">=5.0.4"
|
||||
pydocstyle = ">=6.1.1"
|
||||
toml = "^0.10.2"
|
||||
pytest = "^7.2.1"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
requires = ["poetry-core>=1.3.2"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
|
|
13
scripts/check_null.py
Normal file
13
scripts/check_null.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
def main() -> None:
|
||||
data = sys.stdin.read()
|
||||
if len(data) != 0:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -33,7 +33,7 @@ def get_opts() -> argparse.Namespace:
|
|||
oparser = argparse.ArgumentParser()
|
||||
oparser.add_argument("--input", "-i", type=Path)
|
||||
oparser.add_argument("--toml", "-t", type=Path, required=True)
|
||||
oparser.add_argument("--tags", type=Path)
|
||||
oparser.add_argument("--tags")
|
||||
return oparser.parse_args()
|
||||
|
||||
|
||||
|
@ -47,9 +47,13 @@ def main() -> None:
|
|||
|
||||
if opts.tags:
|
||||
tags = []
|
||||
with opts.tags.open() as f:
|
||||
for line in f:
|
||||
if opts.tags == "-":
|
||||
for line in sys.stdin:
|
||||
tags.append(line[:-1])
|
||||
else:
|
||||
with opts.tags.open() as f:
|
||||
for line in f:
|
||||
tags.append(line[:-1])
|
||||
|
||||
if stable_version not in tags:
|
||||
sys.stderr.write(f"Tag {stable_version} not in git tags: {tags}\n")
|
||||
|
|
42
tests/test_cli.py
Normal file
42
tests/test_cli.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from whispering.cli import Mode, is_valid_arg
|
||||
|
||||
|
||||
class ArgExample(BaseModel):
|
||||
mode: Mode
|
||||
cmd: str
|
||||
ok: bool
|
||||
|
||||
|
||||
def test_options():
|
||||
|
||||
exs = [
|
||||
ArgExample(mode=Mode.server, cmd="--mic 0", ok=False),
|
||||
ArgExample(mode=Mode.server, cmd="--mic 1", ok=False),
|
||||
ArgExample(
|
||||
mode=Mode.server,
|
||||
cmd="--host 0.0.0.0 --port 8000",
|
||||
ok=True,
|
||||
),
|
||||
ArgExample(
|
||||
mode=Mode.server,
|
||||
cmd="--language en --model tiny --host 0.0.0.0 --port 8000",
|
||||
ok=True,
|
||||
),
|
||||
ArgExample(mode=Mode.server, cmd="--beam_size 3", ok=False),
|
||||
ArgExample(mode=Mode.server, cmd="--temperature 0", ok=False),
|
||||
ArgExample(mode=Mode.server, cmd="--num_block 3", ok=False),
|
||||
ArgExample(mode=Mode.mic, cmd="--host 0.0.0.0", ok=False),
|
||||
ArgExample(mode=Mode.mic, cmd="--port 8000", ok=False),
|
||||
]
|
||||
|
||||
for ex in exs:
|
||||
ok = is_valid_arg(
|
||||
mode=ex.mode.value,
|
||||
args=ex.cmd.split(),
|
||||
)
|
||||
assert ok is ex.ok, f"{ex.cmd} should be {ex.ok}"
|
|
@ -7,7 +7,7 @@ import sys
|
|||
from enum import Enum
|
||||
from logging import DEBUG, INFO, basicConfig, getLogger
|
||||
from pathlib import Path
|
||||
from typing import Iterator, Optional, Union
|
||||
from typing import Iterator, List, Optional, Union
|
||||
|
||||
import sounddevice as sd
|
||||
import torch
|
||||
|
@ -16,7 +16,12 @@ from whisper.audio import N_FRAMES, SAMPLE_RATE
|
|||
from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
|
||||
|
||||
from whispering.pbar import ProgressBar
|
||||
from whispering.schema import Context, StdoutWriter, WhisperConfig
|
||||
from whispering.schema import (
|
||||
CURRENT_PROTOCOL_VERSION,
|
||||
Context,
|
||||
StdoutWriter,
|
||||
WhisperConfig,
|
||||
)
|
||||
from whispering.serve import serve_with_websocket
|
||||
from whispering.transcriber import WhisperStreamingTranscriber
|
||||
from whispering.websocket_client import run_websocket_client
|
||||
|
@ -117,7 +122,6 @@ def get_opts() -> argparse.Namespace:
|
|||
group_ws = parser.add_argument_group("WebSocket options")
|
||||
group_ws.add_argument(
|
||||
"--host",
|
||||
default="0.0.0.0",
|
||||
help="host of websocker server",
|
||||
)
|
||||
group_ws.add_argument(
|
||||
|
@ -141,12 +145,22 @@ def get_opts() -> argparse.Namespace:
|
|||
default=[],
|
||||
)
|
||||
group_ctx.add_argument(
|
||||
"--allow-padding",
|
||||
action="store_true",
|
||||
"--vad",
|
||||
type=float,
|
||||
help="Threshold of VAD",
|
||||
default=0.5,
|
||||
)
|
||||
group_ctx.add_argument(
|
||||
"--no-vad",
|
||||
action="store_true",
|
||||
"--max_nospeech_skip",
|
||||
type=int,
|
||||
help="Maximum number of skip to analyze because of nospeech",
|
||||
default=16,
|
||||
)
|
||||
group_ctx.add_argument(
|
||||
"--frame",
|
||||
type=int,
|
||||
help="The number of minimum frames of mel spectrogram input for Whisper",
|
||||
default=N_FRAMES,
|
||||
)
|
||||
|
||||
group_misc = parser.add_argument_group("Other options")
|
||||
|
@ -215,10 +229,12 @@ def get_wshiper(*, opts) -> WhisperStreamingTranscriber:
|
|||
|
||||
def get_context(*, opts) -> Context:
|
||||
ctx = Context(
|
||||
protocol_version=CURRENT_PROTOCOL_VERSION,
|
||||
beam_size=opts.beam_size,
|
||||
temperatures=opts.temperature,
|
||||
allow_padding=opts.allow_padding,
|
||||
vad=not opts.no_vad,
|
||||
max_nospeech_skip=opts.max_nospeech_skip,
|
||||
vad_threshold=opts.vad,
|
||||
mel_frame_min_num=opts.frame,
|
||||
)
|
||||
logger.debug(f"Context: {ctx}")
|
||||
return ctx
|
||||
|
@ -231,17 +247,38 @@ def show_devices():
|
|||
print(f"{i}: {device['name']}")
|
||||
|
||||
|
||||
def check_invalid_arg(opts):
|
||||
ngs = []
|
||||
if opts.mode == Mode.server.value:
|
||||
ngs = [
|
||||
"mic",
|
||||
"allow_padding",
|
||||
]
|
||||
for ng in ngs:
|
||||
if vars(opts).get(ng) not in {None, False}:
|
||||
sys.stderr.write(f"{ng} is not accepted option for {opts.mode} mode\n")
|
||||
sys.exit(1)
|
||||
def is_valid_arg(
|
||||
*,
|
||||
args: List[str],
|
||||
mode: str,
|
||||
) -> bool:
|
||||
keys = []
|
||||
if mode == Mode.server.value:
|
||||
keys = {
|
||||
"--mic",
|
||||
"--beam_size",
|
||||
"-b",
|
||||
"--temperature",
|
||||
"-t",
|
||||
"--num_block",
|
||||
"-n",
|
||||
"--vad",
|
||||
"--max_nospeech_skip",
|
||||
"--output",
|
||||
"--show-devices",
|
||||
"--no-progress",
|
||||
}
|
||||
elif mode == Mode.mic.value:
|
||||
keys = {
|
||||
"--host",
|
||||
"--port",
|
||||
}
|
||||
|
||||
for arg in args:
|
||||
if arg in keys:
|
||||
sys.stderr.write(f"{arg} is not accepted option for {mode} mode\n")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def main() -> None:
|
||||
|
@ -262,7 +299,12 @@ def main() -> None:
|
|||
):
|
||||
opts.mode = Mode.server.value
|
||||
|
||||
check_invalid_arg(opts)
|
||||
if not is_valid_arg(
|
||||
args=sys.argv[1:],
|
||||
mode=opts.mode,
|
||||
):
|
||||
sys.exit(1)
|
||||
|
||||
if opts.mode == Mode.client.value:
|
||||
assert opts.language is None
|
||||
assert opts.model is None
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
from typing import List, Optional
|
||||
from typing import Final, List, Optional
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from pydantic import BaseModel, root_validator
|
||||
from pydantic import BaseModel, Field, root_validator
|
||||
from whisper.audio import N_FRAMES
|
||||
|
||||
|
||||
class WhisperConfig(BaseModel):
|
||||
|
@ -24,14 +25,17 @@ class WhisperConfig(BaseModel):
|
|||
return values
|
||||
|
||||
|
||||
CURRENT_PROTOCOL_VERSION: Final[int] = int("000_006_003")
|
||||
|
||||
|
||||
class Context(BaseModel, arbitrary_types_allowed=True):
|
||||
protocol_version: int
|
||||
timestamp: float = 0.0
|
||||
buffer_tokens: List[torch.Tensor] = []
|
||||
buffer_mel: Optional[torch.Tensor] = None
|
||||
vad: bool = True
|
||||
nosoeech_skip_count: Optional[int] = None
|
||||
|
||||
temperatures: List[float]
|
||||
allow_padding: bool = False
|
||||
patience: Optional[float] = None
|
||||
compression_ratio_threshold: Optional[float] = 2.4
|
||||
logprob_threshold: Optional[float] = -1.0
|
||||
|
@ -42,7 +46,11 @@ class Context(BaseModel, arbitrary_types_allowed=True):
|
|||
logprob_threshold: Optional[float] = -1.0
|
||||
compression_ratio_threshold: Optional[float] = 2.4
|
||||
buffer_threshold: Optional[float] = 0.5
|
||||
vad_threshold: float = 0.5
|
||||
vad_threshold: float
|
||||
max_nospeech_skip: int
|
||||
mel_frame_min_num: int = Field(N_FRAMES, ge=1, le=N_FRAMES)
|
||||
|
||||
data_type: str = "float32"
|
||||
|
||||
|
||||
class ParsedChunk(BaseModel):
|
||||
|
|
|
@ -3,16 +3,20 @@
|
|||
import asyncio
|
||||
import json
|
||||
from logging import getLogger
|
||||
from typing import Optional
|
||||
from typing import Final, Optional
|
||||
|
||||
import numpy as np
|
||||
import websockets
|
||||
from websockets.exceptions import ConnectionClosedOK
|
||||
|
||||
from whispering.transcriber import Context, WhisperStreamingTranscriber
|
||||
from whispering.schema import CURRENT_PROTOCOL_VERSION, Context
|
||||
from whispering.transcriber import WhisperStreamingTranscriber
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
MIN_PROTOCOL_VERSION: Final[int] = int("000_006_000")
|
||||
MAX_PROTOCOL_VERSION: Final[int] = CURRENT_PROTOCOL_VERSION
|
||||
|
||||
|
||||
async def serve_with_websocket_main(websocket):
|
||||
global g_wsp
|
||||
|
@ -41,10 +45,28 @@ async def serve_with_websocket_main(websocket):
|
|||
)
|
||||
)
|
||||
return
|
||||
|
||||
if ctx.protocol_version < MIN_PROTOCOL_VERSION:
|
||||
await websocket.send(
|
||||
json.dumps(
|
||||
{
|
||||
"error": f"protocol_version is older than {MIN_PROTOCOL_VERSION}"
|
||||
}
|
||||
)
|
||||
)
|
||||
elif ctx.protocol_version > MAX_PROTOCOL_VERSION:
|
||||
await websocket.send(
|
||||
json.dumps(
|
||||
{
|
||||
"error": f"protocol_version is newer than {MAX_PROTOCOL_VERSION}"
|
||||
}
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
continue
|
||||
|
||||
logger.debug(f"Message size: {len(message)}")
|
||||
audio = np.frombuffer(message, dtype=np.float32)
|
||||
if ctx is None:
|
||||
await websocket.send(
|
||||
json.dumps(
|
||||
|
@ -54,6 +76,7 @@ async def serve_with_websocket_main(websocket):
|
|||
)
|
||||
)
|
||||
return
|
||||
audio = np.frombuffer(message, dtype=np.dtype(ctx.data_type)).astype(np.float32)
|
||||
for chunk in g_wsp.transcribe(
|
||||
audio=audio, # type: ignore
|
||||
ctx=ctx,
|
||||
|
|
|
@ -30,9 +30,9 @@ class WhisperStreamingTranscriber:
|
|||
self.dtype = torch.float16 if fp16 else torch.float32
|
||||
if self.model.device == torch.device("cpu"):
|
||||
if torch.cuda.is_available():
|
||||
logger.warning("Performing inference on CPU when CUDA is available")
|
||||
logger.info("Performing inference on CPU though CUDA is available")
|
||||
if self.dtype == torch.float16:
|
||||
logger.warning("FP16 is not supported on CPU; using FP32 instead")
|
||||
logger.info("Using FP32 because FP16 is not supported on CPU")
|
||||
self.dtype = torch.float32
|
||||
|
||||
if self.dtype == torch.float32:
|
||||
|
@ -233,8 +233,9 @@ class WhisperStreamingTranscriber:
|
|||
ctx: Context,
|
||||
) -> Iterator[ParsedChunk]:
|
||||
logger.debug(f"{len(audio)}")
|
||||
force_padding: bool = False
|
||||
|
||||
if ctx.vad:
|
||||
if ctx.vad_threshold > 0.0:
|
||||
x = [
|
||||
v
|
||||
for v in self.vad(
|
||||
|
@ -246,7 +247,20 @@ class WhisperStreamingTranscriber:
|
|||
if len(x) == 0: # No speech
|
||||
logger.debug("No speech")
|
||||
ctx.timestamp += len(audio) / N_FRAMES * self.duration_pre_one_mel
|
||||
return
|
||||
|
||||
if ctx.nosoeech_skip_count is not None:
|
||||
ctx.nosoeech_skip_count += 1
|
||||
|
||||
if (
|
||||
ctx.nosoeech_skip_count is None
|
||||
or ctx.nosoeech_skip_count <= ctx.max_nospeech_skip
|
||||
):
|
||||
logger.debug(
|
||||
f"nosoeech_skip_count: {ctx.nosoeech_skip_count} (<= {ctx.max_nospeech_skip})"
|
||||
)
|
||||
return
|
||||
ctx.nosoeech_skip_count = None
|
||||
force_padding = True
|
||||
|
||||
new_mel = log_mel_spectrogram(audio=audio)
|
||||
logger.debug(f"Incoming new_mel.shape: {new_mel.shape}")
|
||||
|
@ -261,12 +275,15 @@ class WhisperStreamingTranscriber:
|
|||
seek: int = 0
|
||||
while seek < mel.shape[-1]:
|
||||
logger.debug(f"seek: {seek}")
|
||||
if mel.shape[-1] - seek < N_FRAMES:
|
||||
if mel.shape[-1] - seek <= 0:
|
||||
logger.debug(f"No more seek: mel.shape={mel.shape}, seek={seek}")
|
||||
break
|
||||
if mel.shape[-1] - seek < ctx.mel_frame_min_num:
|
||||
logger.debug(
|
||||
f"mel.shape ({mel.shape[-1]}) - seek ({seek}) < N_FRAMES ({N_FRAMES})"
|
||||
f"mel.shape ({mel.shape[-1]}) - seek ({seek}) < ctx.mel_frame_min_num ({ctx.mel_frame_min_num})"
|
||||
)
|
||||
if ctx.allow_padding:
|
||||
logger.warning("Padding is not expected while speaking")
|
||||
if force_padding:
|
||||
logger.debug("Padding")
|
||||
else:
|
||||
logger.debug("No padding")
|
||||
break
|
||||
|
@ -319,9 +336,13 @@ class WhisperStreamingTranscriber:
|
|||
logger.debug(f"new seek={seek}, mel.shape: {mel.shape}")
|
||||
|
||||
if mel.shape[-1] - seek <= 0:
|
||||
ctx.buffer_mel = None
|
||||
ctx.nosoeech_skip_count = None
|
||||
logger.debug(f"ctx.buffer_mel is None ({mel.shape}, {seek})")
|
||||
return
|
||||
ctx.buffer_mel = mel[:, seek:]
|
||||
assert ctx.buffer_mel is not None
|
||||
logger.debug(f"ctx.buffer_mel.shape: {ctx.buffer_mel.shape}")
|
||||
del mel
|
||||
if ctx.nosoeech_skip_count is None:
|
||||
ctx.nosoeech_skip_count = 0 # start count
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from logging import getLogger
|
||||
from typing import Iterator, Optional
|
||||
|
||||
import numpy as np
|
||||
|
@ -8,6 +9,8 @@ from whisper.audio import N_FRAMES, SAMPLE_RATE
|
|||
|
||||
from whispering.schema import SpeechSegment
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class VAD:
|
||||
def __init__(
|
||||
|
@ -50,6 +53,7 @@ class VAD:
|
|||
torch.from_numpy(audio[start:end]),
|
||||
SAMPLE_RATE,
|
||||
).item()
|
||||
logger.debug(f"VAD: {vad_prob} (threshold={threshold})")
|
||||
if vad_prob > threshold:
|
||||
if start_block_idx is None:
|
||||
start_block_idx = idx
|
||||
|
|
Loading…
Reference in a new issue