mirror of
https://github.com/Chocobozzz/PeerTube.git
synced 2024-05-16 20:02:40 +00:00
chore(test): wip transcript file and benchmark
This commit is contained in:
parent
79c12baa9a
commit
ab099a0db2
11
packages/tests/fixtures/transcription/README.md
vendored
11
packages/tests/fixtures/transcription/README.md
vendored
|
@ -1,5 +1,6 @@
|
|||
|
||||
CC BY-NC-SA 4.0 Deed
|
||||
Attribution-NonCommercial-ShareAlike 4.0 International
|
||||
communiquer-lors-dune-classe-transplantee.mp4
|
||||
https://podeduc.apps.education.fr/numerique-educatif/video/21893-communiquer-lors-dune-classe-transplantee/
|
||||
🇫🇷 DRANE Occitanie - Communiquer lors d'une classe transplantée
|
||||
[./communiquer-lors-dune-classe-transplantee.mp4](videos/communiquer-lors-dune-classe-transplantee.mp4)
|
||||
> https://podeduc.apps.education.fr/numerique-educatif/video/21893-communiquer-lors-dune-classe-transplantee/
|
||||
>
|
||||
> CC BY-NC-SA 4.0 Deed
|
||||
> Attribution-NonCommercial-ShareAlike 4.0 International
|
||||
|
|
223
packages/tests/fixtures/transcription/models/faster-whisper-tiny/config.json
vendored
Normal file
223
packages/tests/fixtures/transcription/models/faster-whisper-tiny/config.json
vendored
Normal file
|
@ -0,0 +1,223 @@
|
|||
{
|
||||
"alignment_heads": [
|
||||
[
|
||||
2,
|
||||
2
|
||||
],
|
||||
[
|
||||
3,
|
||||
0
|
||||
],
|
||||
[
|
||||
3,
|
||||
2
|
||||
],
|
||||
[
|
||||
3,
|
||||
3
|
||||
],
|
||||
[
|
||||
3,
|
||||
4
|
||||
],
|
||||
[
|
||||
3,
|
||||
5
|
||||
]
|
||||
],
|
||||
"lang_ids": [
|
||||
50259,
|
||||
50260,
|
||||
50261,
|
||||
50262,
|
||||
50263,
|
||||
50264,
|
||||
50265,
|
||||
50266,
|
||||
50267,
|
||||
50268,
|
||||
50269,
|
||||
50270,
|
||||
50271,
|
||||
50272,
|
||||
50273,
|
||||
50274,
|
||||
50275,
|
||||
50276,
|
||||
50277,
|
||||
50278,
|
||||
50279,
|
||||
50280,
|
||||
50281,
|
||||
50282,
|
||||
50283,
|
||||
50284,
|
||||
50285,
|
||||
50286,
|
||||
50287,
|
||||
50288,
|
||||
50289,
|
||||
50290,
|
||||
50291,
|
||||
50292,
|
||||
50293,
|
||||
50294,
|
||||
50295,
|
||||
50296,
|
||||
50297,
|
||||
50298,
|
||||
50299,
|
||||
50300,
|
||||
50301,
|
||||
50302,
|
||||
50303,
|
||||
50304,
|
||||
50305,
|
||||
50306,
|
||||
50307,
|
||||
50308,
|
||||
50309,
|
||||
50310,
|
||||
50311,
|
||||
50312,
|
||||
50313,
|
||||
50314,
|
||||
50315,
|
||||
50316,
|
||||
50317,
|
||||
50318,
|
||||
50319,
|
||||
50320,
|
||||
50321,
|
||||
50322,
|
||||
50323,
|
||||
50324,
|
||||
50325,
|
||||
50326,
|
||||
50327,
|
||||
50328,
|
||||
50329,
|
||||
50330,
|
||||
50331,
|
||||
50332,
|
||||
50333,
|
||||
50334,
|
||||
50335,
|
||||
50336,
|
||||
50337,
|
||||
50338,
|
||||
50339,
|
||||
50340,
|
||||
50341,
|
||||
50342,
|
||||
50343,
|
||||
50344,
|
||||
50345,
|
||||
50346,
|
||||
50347,
|
||||
50348,
|
||||
50349,
|
||||
50350,
|
||||
50351,
|
||||
50352,
|
||||
50353,
|
||||
50354,
|
||||
50355,
|
||||
50356,
|
||||
50357
|
||||
],
|
||||
"suppress_ids": [
|
||||
1,
|
||||
2,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
14,
|
||||
25,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
31,
|
||||
58,
|
||||
59,
|
||||
60,
|
||||
61,
|
||||
62,
|
||||
63,
|
||||
90,
|
||||
91,
|
||||
92,
|
||||
93,
|
||||
359,
|
||||
503,
|
||||
522,
|
||||
542,
|
||||
873,
|
||||
893,
|
||||
902,
|
||||
918,
|
||||
922,
|
||||
931,
|
||||
1350,
|
||||
1853,
|
||||
1982,
|
||||
2460,
|
||||
2627,
|
||||
3246,
|
||||
3253,
|
||||
3268,
|
||||
3536,
|
||||
3846,
|
||||
3961,
|
||||
4183,
|
||||
4667,
|
||||
6585,
|
||||
6647,
|
||||
7273,
|
||||
9061,
|
||||
9383,
|
||||
10428,
|
||||
10929,
|
||||
11938,
|
||||
12033,
|
||||
12331,
|
||||
12562,
|
||||
13793,
|
||||
14157,
|
||||
14635,
|
||||
15265,
|
||||
15618,
|
||||
16553,
|
||||
16604,
|
||||
18362,
|
||||
18956,
|
||||
20075,
|
||||
21675,
|
||||
22520,
|
||||
26130,
|
||||
26161,
|
||||
26435,
|
||||
28279,
|
||||
29464,
|
||||
31650,
|
||||
32302,
|
||||
32470,
|
||||
36865,
|
||||
42863,
|
||||
47425,
|
||||
49870,
|
||||
50254,
|
||||
50258,
|
||||
50358,
|
||||
50359,
|
||||
50360,
|
||||
50361,
|
||||
50362
|
||||
],
|
||||
"suppress_ids_begin": [
|
||||
220,
|
||||
50257
|
||||
]
|
||||
}
|
Binary file not shown.
114853
packages/tests/fixtures/transcription/models/faster-whisper-tiny/tokenizer.json
vendored
Normal file
114853
packages/tests/fixtures/transcription/models/faster-whisper-tiny/tokenizer.json
vendored
Normal file
File diff suppressed because it is too large
Load diff
51867
packages/tests/fixtures/transcription/models/faster-whisper-tiny/vocabulary.json
vendored
Normal file
51867
packages/tests/fixtures/transcription/models/faster-whisper-tiny/vocabulary.json
vendored
Normal file
File diff suppressed because it is too large
Load diff
10
packages/tests/fixtures/transcription/transcript/reference.txt
vendored
Normal file
10
packages/tests/fixtures/transcription/transcript/reference.txt
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
Communiquer lors d'une classe transplantée. Utiliser les photos prises lors de cette classe pour raconter quotidiennement le séjour vécu.
|
||||
C'est le scénario pédagogique présenté par Monsieur Navoli, professeur en cycle 3 sur une école élémentaire de Montpellier.
|
||||
La première application utilisée sera la médiathèque. L'enseignant va alors transférer les différentes photos réalisées lors de la classe transplantée.
|
||||
Dans un dossier spécifique pour que les élèves puissent le retrouver plus facilement. Il téléverse donc ses photos dans le dossier, dans l'ENT, dans la médiathèque de la classe.
|
||||
Pour terminer, il s'assure que le dossier soit bien ouvert aux utilisateurs afin que tout le monde puisse l'utiliser.
|
||||
Les élèves par la suite utiliseront le blog, à partir de leurs notes, il pourront, seul ou à 2 par poste rédiger un article dans leur ENT.
|
||||
Ils illustreront ces articles à l'aide des photos et documents numériques mis en accès libre dans l'ENT.
|
||||
Pour ce faire, il pourront utiliser l'éditeur avancé qui les renverra directement dans la médiathèque de la classe, où ils pourront retrouver le dossier créé par leur enseignant.
|
||||
Une fois leur article terminé, les élèves soumettront celui-ci au professeur qui pourra soit l'annoter pour correction ou le publier.
|
||||
Ensuite, il pourront lire et commenter ceux de leurs camarades, ou répondre aux commentaires de la veille.
|
122
packages/tests/src/transcription/benchmark.spec.ts
Normal file
122
packages/tests/src/transcription/benchmark.spec.ts
Normal file
|
@ -0,0 +1,122 @@
|
|||
import { createLogger } from 'winston'
|
||||
import { performance, PerformanceObserver } from 'node:perf_hooks'
|
||||
// import { CpuInfo, CpuUsage } from 'node:os'
|
||||
import { rm, mkdir } from 'node:fs/promises'
|
||||
import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils'
|
||||
import {
|
||||
toHumanReadable,
|
||||
transcriberFactory,
|
||||
TranscriptFile,
|
||||
TranscriptFileEvaluator,
|
||||
TranscriptionEngine
|
||||
} from '@peertube/peertube-transcription'
|
||||
|
||||
const WER_TOLERANCE = 0.01
|
||||
const CER_TOLERANCE = 0.001
|
||||
|
||||
interface TestResult {
|
||||
uuid: string
|
||||
WER: number
|
||||
CER: number
|
||||
duration: number
|
||||
engine: TranscriptionEngine
|
||||
dataThroughput: number // relevant ?
|
||||
// cpus: CpuInfo[] // https://nodejs.org/docs/latest-v18.x/api/os.html#oscpus
|
||||
// cpuUsages: CpuUsage[] // https://nodejs.org/docs/latest-v18.x/api/process.html#processcpuusagepreviousvalue
|
||||
// // os.totalmem()
|
||||
// // os.freemem()
|
||||
// memoryUsages: Record<number, MemoryUsage> // https://nodejs.org/docs/latest-v18.x/api/process.html#processmemoryusage
|
||||
}
|
||||
|
||||
const benchmarkReducer = (benchmark: Record<string, Partial<TestResult>> = {}, engineName: string, testResult: Partial<TestResult>) => ({
|
||||
...benchmark,
|
||||
[engineName]: {
|
||||
...benchmark[engineName],
|
||||
...testResult
|
||||
}
|
||||
})
|
||||
|
||||
interface FormattedTestResult {
|
||||
WER?: string
|
||||
CER?: string
|
||||
duration?: string
|
||||
}
|
||||
|
||||
const formatTestResult = (testResult: Partial<TestResult>): FormattedTestResult => ({
|
||||
WER: testResult.WER ? `${testResult.WER * 100}%` : undefined,
|
||||
CER: testResult.CER ? `${testResult.CER * 100}%` : undefined,
|
||||
duration: testResult.duration ? toHumanReadable(testResult.duration) : undefined
|
||||
})
|
||||
|
||||
describe('Transcribers benchmark', function () {
|
||||
const transcribers = [
|
||||
'openai-whisper',
|
||||
'whisper-ctranslate2',
|
||||
'whisper-timestamped'
|
||||
]
|
||||
|
||||
const transcriptDirectory = buildAbsoluteFixturePath('transcription/benchmark/')
|
||||
const mediaFilePath = buildAbsoluteFixturePath('transcription/videos/communiquer-lors-dune-classe-transplantee.mp4')
|
||||
const referenceTranscriptFile = new TranscriptFile({
|
||||
path: buildAbsoluteFixturePath('transcription/transcript/reference.txt'),
|
||||
language: 'fr',
|
||||
format: 'txt'
|
||||
})
|
||||
|
||||
let benchmark: Record<string, Partial<TestResult>> = {}
|
||||
|
||||
before(async function () {
|
||||
await mkdir(transcriptDirectory, { recursive: true })
|
||||
|
||||
const performanceObserver = new PerformanceObserver((items) => {
|
||||
items
|
||||
.getEntries()
|
||||
.forEach((entry) => {
|
||||
const engineName = transcribers.find(transcriberName => entry.name.includes(transcriberName))
|
||||
|
||||
benchmark = benchmarkReducer(benchmark, engineName, {
|
||||
uuid: entry.name,
|
||||
duration: entry.duration
|
||||
})
|
||||
})
|
||||
})
|
||||
performanceObserver.observe({ type: 'measure' })
|
||||
})
|
||||
|
||||
transcribers.forEach(function (transcriberName) {
|
||||
describe(`${transcriberName}`, function () {
|
||||
it('Should run a benchmark on each transcriber implementation', async function () {
|
||||
this.timeout(45000)
|
||||
const transcriber = transcriberFactory.createFromEngineName(
|
||||
transcriberName,
|
||||
createLogger(),
|
||||
transcriptDirectory
|
||||
)
|
||||
const transcriptFile = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'txt')
|
||||
const evaluator = new TranscriptFileEvaluator(referenceTranscriptFile, transcriptFile)
|
||||
await new Promise(resolve => setTimeout(resolve, 1))
|
||||
|
||||
benchmark = benchmarkReducer(benchmark, transcriberName, {
|
||||
engine: transcriber.engine,
|
||||
WER: await evaluator.wer(),
|
||||
CER: await evaluator.cer()
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
console.table(
|
||||
Object
|
||||
.keys(benchmark)
|
||||
.reduce((formattedBenchmark, engineName, currentIndex, array) => ({
|
||||
...formattedBenchmark,
|
||||
[engineName]: formatTestResult(benchmark[engineName])
|
||||
}), {})
|
||||
)
|
||||
|
||||
await rm(transcriptDirectory, { recursive: true, force: true })
|
||||
|
||||
performance.clearMarks()
|
||||
})
|
||||
})
|
|
@ -1,103 +0,0 @@
|
|||
import { createLogger } from 'winston'
|
||||
import { join } from 'path'
|
||||
import { rm, mkdir } from 'node:fs/promises'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { toHumanReadable, transcriberFactory, TranscriptionEngine } from '@peertube/peertube-transcription'
|
||||
import { performance, PerformanceObserver } from 'node:perf_hooks'
|
||||
import { CpuInfo, CpuUsage } from 'node:os'
|
||||
|
||||
const WER_TOLERANCE = 1
|
||||
const CER_TOLERANCE = 1
|
||||
|
||||
interface TestResult {
|
||||
WER: number
|
||||
CER: number
|
||||
duration: number
|
||||
engine: TranscriptionEngine
|
||||
dataThroughput: number // relevant ?
|
||||
cpus: CpuInfo[]
|
||||
cpuUsages: CpuUsage[]
|
||||
/**
|
||||
* {
|
||||
* rss: 4935680,
|
||||
* heapTotal: 1826816,
|
||||
* heapUsed: 650472,
|
||||
* external: 49879,
|
||||
* arrayBuffers: 9386
|
||||
* }
|
||||
*
|
||||
* - `heapTotal` and `heapUsed` refer to V8's memory usage.
|
||||
* - `external` refers to the memory usage of C++ objects bound to JavaScript objects managed by V8.
|
||||
* - `rss`, Resident Set Size, is the amount of space occupied in the main memory device
|
||||
* (that is a subset of the total allocated memory) for the process, including all C++ and JavaScript objects and code.
|
||||
* - `arrayBuffers` refers to memory allocated for ArrayBuffers and SharedArrayBuffers, including all Node.js Buffers.
|
||||
* This is also included in the external value.
|
||||
* When Node.js is used as an embedded library, this value may be 0 because allocations for ArrayBuffers may not be tracked in that case.
|
||||
*
|
||||
* When using Worker threads, rss will be a value that is valid for the entire process,
|
||||
* while the other fields will only refer to the current thread.
|
||||
*
|
||||
* The process.memoryUsage() method iterates over each page to gather information about memory usage
|
||||
* which might be slow depending on the program memory allocations.
|
||||
*/
|
||||
memoryUsages: Record<number, MemoryUsage>
|
||||
}
|
||||
|
||||
// var os = require('os');
|
||||
//
|
||||
// console.log(os.cpus())
|
||||
// console.log(os.totalmem());
|
||||
// console.log(os.freemem())
|
||||
//
|
||||
// const testsResults: Record<string, TestResult> = {
|
||||
// cpus: []
|
||||
// }
|
||||
//
|
||||
// async function testTranscriptGeneration (transformerBackend: string, model: string, mediaFilePath: string) {
|
||||
// const testResults = {
|
||||
// WER: 3,
|
||||
// CER: 3,
|
||||
// duration: 3
|
||||
// }
|
||||
//
|
||||
// return testResults
|
||||
// }
|
||||
|
||||
describe('Transcribers benchmark', function () {
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const transcribers = [
|
||||
'openai-whisper',
|
||||
'whisper-ctranslate2',
|
||||
'whisper-timestamped'
|
||||
]
|
||||
|
||||
before(async function () {
|
||||
await mkdir(transcriptDirectory, { recursive: true })
|
||||
|
||||
const performanceObserver = new PerformanceObserver((items) => {
|
||||
items
|
||||
.getEntries()
|
||||
.forEach((entry) => console.log(`Transcription ${entry.name} took ${toHumanReadable(entry.duration)}`))
|
||||
})
|
||||
performanceObserver.observe({ type: 'measure' })
|
||||
})
|
||||
|
||||
transcribers.forEach(function (transcriberName) {
|
||||
describe(`${transcriberName}`, function () {
|
||||
it('Should run transcription on a media file without raising any errors', async function () {
|
||||
const transcriber = transcriberFactory.createFromEngineName(
|
||||
transcriberName,
|
||||
createLogger(),
|
||||
transcriptDirectory
|
||||
)
|
||||
await transcriber.transcribe(mediaFilePath)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
await rm(transcriptDirectory, { recursive: true, force: true })
|
||||
performance.clearMarks()
|
||||
})
|
||||
})
|
|
@ -0,0 +1,66 @@
|
|||
/* eslint-disable @typescript-eslint/no-unused-expressions, no-new */
|
||||
import { TranscriptFile, TranscriptFileEvaluator } from '@peertube/peertube-transcription'
|
||||
import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils'
|
||||
import { join } from 'path'
|
||||
import { mkdir, rm } from 'node:fs/promises'
|
||||
import { expect } from 'chai'
|
||||
|
||||
describe('Transcript File Evaluator', function () {
|
||||
const transcriptDirectory = buildAbsoluteFixturePath('transcription/transcript-evaluator')
|
||||
const referenceTranscriptFilepath = buildAbsoluteFixturePath('transcription/transcript/reference.txt')
|
||||
|
||||
before(async function () {
|
||||
await mkdir(transcriptDirectory, { recursive: true })
|
||||
})
|
||||
|
||||
it(`may not compare files in another format than txt`, async function () {
|
||||
const vttReference = await TranscriptFile.write({
|
||||
path: join(transcriptDirectory, 'reference.vtt'),
|
||||
format: 'vtt',
|
||||
content: ''
|
||||
})
|
||||
const vttHypothesis = await TranscriptFile.write({
|
||||
path: join(transcriptDirectory, 'hypothesis.vtt'),
|
||||
format: 'vtt',
|
||||
content: ''
|
||||
})
|
||||
expect(() => new TranscriptFileEvaluator(vttReference, vttHypothesis)).to.throw('Can only evaluate txt transcript file')
|
||||
})
|
||||
|
||||
it(`evaluation must return coherent wer & cer`, async function () {
|
||||
const reference = new TranscriptFile({
|
||||
path: referenceTranscriptFilepath,
|
||||
language: 'fr',
|
||||
format: 'txt'
|
||||
})
|
||||
const hypothesis = await TranscriptFile.write({
|
||||
path: join(transcriptDirectory, 'openai.txt'),
|
||||
content: `Communiquez lors d'une classe transplante. Utilisez les photos prises lors de cette classe pour raconter quotidiennement le séjour vécu.
|
||||
C'est le scénario P-Dagujic présenté par monsieur Navoli, professeur ainsi que le 3 sur une école alimentaire de Montpellier.
|
||||
La première application a utilisé ce ralame déatec. L'enseignant va alors transférer les différentes photos réalisés lors de la classe transplante.
|
||||
Dans un dossier, spécifique pour que les élèves puissent le retrouver plus facilement. Il téléverse donc ses photos dans le dossier, dans le venté, dans la médiatèque de la classe.
|
||||
Pour terminer, il s'assure que le dossier soit bien ouvert aux utilisateurs afin que tout le monde puisse l'utiliser.
|
||||
Les élèves par la suite utilisera le blog. A partir de leurs nantes, il pourront se loi de parposte rédigeant un article d'un reinté.
|
||||
Ils illustront ses articles à l'aide des photos de que mon numérique mise à n'accélier dans le venté.
|
||||
Pour se faire, il pourront utiliser les diteurs avancés qui les renvèrent directement dans la médiatèque de la classe où il pourront retrouver le dossier créé par leurs enseignants.
|
||||
Une fois leur article terminée, les élèves soumétront se lui-ci au professeur qui pourra soit la noté pour correction ou le public.
|
||||
Ensuite, il pourront lire et commenter ce de leurs camarades ou répondre aux commentaires de la veille.
|
||||
`,
|
||||
format: 'txt',
|
||||
language: 'fr'
|
||||
})
|
||||
const evaluator = new TranscriptFileEvaluator(reference, hypothesis)
|
||||
const wer = await evaluator.wer()
|
||||
expect(wer).to.be.below(1)
|
||||
expect(wer).to.be.greaterThan(0.3)
|
||||
|
||||
const cer = await evaluator.cer()
|
||||
expect(cer).to.be.below(0.1)
|
||||
expect(cer).to.be.greaterThan(0.09)
|
||||
console.log(await evaluator.alignement())
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
await rm(transcriptDirectory, { recursive: true, force: true })
|
||||
})
|
||||
})
|
|
@ -0,0 +1,52 @@
|
|||
/* eslint-disable @typescript-eslint/no-unused-expressions */
|
||||
import { expect } from 'chai'
|
||||
import { mkdir } from 'node:fs/promises'
|
||||
import { TranscriptFile } from '@peertube/peertube-transcription'
|
||||
import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils'
|
||||
|
||||
describe('Transcript File', function () {
|
||||
before(async function () {
|
||||
await mkdir(buildAbsoluteFixturePath('transcription/transcript/'), { recursive: true })
|
||||
})
|
||||
|
||||
it(`may creates a new transcript file from scratch`, async function () {
|
||||
const transcript1 = await TranscriptFile.write({
|
||||
path: buildAbsoluteFixturePath('transcription/transcript/test1.txt'),
|
||||
content: 'test2',
|
||||
format: 'txt'
|
||||
})
|
||||
const transcript2 = await TranscriptFile.write({
|
||||
path: buildAbsoluteFixturePath('transcription/transcript/test2.txt'),
|
||||
content: 'test2',
|
||||
format: 'txt'
|
||||
})
|
||||
|
||||
expect(await transcript1.equals(transcript2)).to.be.true
|
||||
|
||||
const reference = new TranscriptFile({
|
||||
path: buildAbsoluteFixturePath('transcription/transcript/reference.txt'),
|
||||
language: 'fr',
|
||||
format: 'txt'
|
||||
})
|
||||
const hypothesis = await TranscriptFile.write({
|
||||
path: buildAbsoluteFixturePath('transcription/transcript/openai.txt'),
|
||||
content: `Communiquez lors d'une classe transplante. Utilisez les photos prises lors de cette classe pour raconter quotidiennement le séjour vécu.
|
||||
C'est le scénario P-Dagujic présenté par monsieur Navoli, professeur ainsi que le 3 sur une école alimentaire de Montpellier.
|
||||
La première application a utilisé ce ralame déatec. L'enseignant va alors transférer les différentes photos réalisés lors de la classe transplante.
|
||||
Dans un dossier, spécifique pour que les élèves puissent le retrouver plus facilement. Il téléverse donc ses photos dans le dossier, dans le venté, dans la médiatèque de la classe.
|
||||
Pour terminer, il s'assure que le dossier soit bien ouvert aux utilisateurs afin que tout le monde puisse l'utiliser.
|
||||
Les élèves par la suite utilisera le blog. A partir de leurs nantes, il pourront se loi de parposte rédigeant un article d'un reinté.
|
||||
Ils illustront ses articles à l'aide des photos de que mon numérique mise à n'accélier dans le venté.
|
||||
Pour se faire, il pourront utiliser les diteurs avancés qui les renvèrent directement dans la médiatèque de la classe où il pourront retrouver le dossier créé par leurs enseignants.
|
||||
Une fois leur article terminée, les élèves soumétront se lui-ci au professeur qui pourra soit la noté pour correction ou le public.
|
||||
Ensuite, il pourront lire et commenter ce de leurs camarades ou répondre aux commentaires de la veille.
|
||||
`,
|
||||
format: 'txt',
|
||||
language: 'fr'
|
||||
})
|
||||
|
||||
const output = await reference.evaluate(hypothesis)
|
||||
|
||||
console.log(output)
|
||||
})
|
||||
})
|
|
@ -1,17 +1,17 @@
|
|||
/* eslint-disable @typescript-eslint/no-unused-expressions */
|
||||
import { expect, config } from 'chai'
|
||||
import { createLogger } from 'winston'
|
||||
import { join } from 'path'
|
||||
import { expect, config } from 'chai'
|
||||
import { existsSync } from 'node:fs'
|
||||
import { mkdir, readFile, rm } from 'node:fs/promises'
|
||||
import { mkdir, rm } from 'node:fs/promises'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { OpenaiTranscriber } from '@peertube/peertube-transcription'
|
||||
import { OpenaiTranscriber, TranscriptFile } from '@peertube/peertube-transcription'
|
||||
|
||||
config.truncateThreshold = 0
|
||||
|
||||
describe('Open AI Whisper transcriber', function () {
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const frVideoPath = buildAbsoluteFixturePath('transcription/communiquer-lors-dune-classe-transplantee.mp4')
|
||||
const frVideoPath = buildAbsoluteFixturePath('transcription/videos/communiquer-lors-dune-classe-transplantee.mp4')
|
||||
|
||||
const transcriber = new OpenaiTranscriber(
|
||||
{
|
||||
|
@ -31,15 +31,13 @@ describe('Open AI Whisper transcriber', function () {
|
|||
|
||||
it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath)
|
||||
expect(transcript).to.deep.equals({
|
||||
expect(transcript.equals(new TranscriptFile({
|
||||
path: join(transcriptDirectory, 'video_short.vtt'),
|
||||
language: 'en',
|
||||
format: 'vtt'
|
||||
})
|
||||
}))).to.be.true
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(
|
||||
expect(await transcript.read()).to.equal(
|
||||
`WEBVTT
|
||||
|
||||
00:00.000 --> 00:02.000
|
||||
|
@ -51,15 +49,13 @@ You
|
|||
|
||||
it('May produce a transcript file in the `srt` format', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt')
|
||||
expect(transcript).to.deep.equals({
|
||||
expect(transcript.equals(new TranscriptFile({
|
||||
path: join(transcriptDirectory, 'video_short.srt'),
|
||||
language: 'en',
|
||||
format: 'srt'
|
||||
})
|
||||
}))).to.be.true
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(
|
||||
expect(await transcript.read()).to.equal(
|
||||
`1
|
||||
00:00:00,000 --> 00:00:02,000
|
||||
You
|
||||
|
@ -70,15 +66,13 @@ You
|
|||
|
||||
it('May produce a transcript file in the `txt` format', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt')
|
||||
expect(transcript).to.deep.equals({
|
||||
expect(transcript.equals(new TranscriptFile({
|
||||
path: join(transcriptDirectory, 'video_short.txt'),
|
||||
language: 'en',
|
||||
format: 'txt'
|
||||
})
|
||||
}))).to.be.true
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(`You
|
||||
expect(await transcript.read()).to.equal(`You
|
||||
`)
|
||||
})
|
||||
|
||||
|
@ -89,15 +83,13 @@ You
|
|||
it('May transcribe a media file in french', async function () {
|
||||
this.timeout(45000)
|
||||
const transcript = await transcriber.transcribe(frVideoPath, { name: 'tiny' }, 'fr', 'txt')
|
||||
expect(transcript).to.deep.equals({
|
||||
expect(transcript.equals(new TranscriptFile({
|
||||
path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'),
|
||||
language: 'fr',
|
||||
format: 'txt'
|
||||
})
|
||||
})))
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(
|
||||
expect(await transcript.read()).to.equal(
|
||||
`Communiquez lors d'une classe transplante. Utilisez les photos prises lors de cette classe pour raconter quotidiennement le séjour vécu.
|
||||
C'est le scénario P-Dagujic présenté par monsieur Navoli, professeur ainsi que le 3 sur une école alimentaire de Montpellier.
|
||||
La première application a utilisé ce ralame déatec. L'enseignant va alors transférer les différentes photos réalisés lors de la classe transplante.
|
||||
|
@ -113,17 +105,15 @@ Ensuite, il pourront lire et commenter ce de leurs camarades ou répondre aux co
|
|||
})
|
||||
|
||||
it('May transcribe a media file in french with small model', async function () {
|
||||
this.timeout(300000)
|
||||
this.timeout(400000)
|
||||
const transcript = await transcriber.transcribe(frVideoPath, { name: 'small' }, 'fr', 'txt')
|
||||
expect(transcript).to.deep.equals({
|
||||
expect(transcript.equals(new TranscriptFile({
|
||||
path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'),
|
||||
language: 'fr',
|
||||
format: 'txt'
|
||||
})
|
||||
}))).to.be.true
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(
|
||||
expect(await transcript.read()).to.equal(
|
||||
`Communiquer lors d'une classe transplantée. Utiliser les photos prises lors de cette classe
|
||||
pour raconter quotidiennement le séjour vécu. C'est le scénario pédagogique présenté
|
||||
par M. Navoli, professeur en cycle 3 sur une école élémentaire de Montpellier.
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
/* eslint-disable @typescript-eslint/no-unused-expressions */
|
||||
import { expect, config } from 'chai'
|
||||
import { createLogger } from 'winston'
|
||||
import { join } from 'path'
|
||||
import { expect, config } from 'chai'
|
||||
import { existsSync } from 'node:fs'
|
||||
import { mkdir, readFile, rm } from 'node:fs/promises'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
|
@ -11,7 +12,7 @@ config.truncateThreshold = 0
|
|||
describe('Linto timestamped Whisper transcriber', function () {
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const frVideoPath = buildAbsoluteFixturePath('transcription/communiquer-lors-dune-classe-transplantee.mp4')
|
||||
const frVideoPath = buildAbsoluteFixturePath('transcription/videos/communiquer-lors-dune-classe-transplantee.mp4')
|
||||
const transcriber = new WhisperTimestampedTranscriber(
|
||||
{
|
||||
name: 'whisper-timestamped',
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
/* eslint-disable @typescript-eslint/no-unused-expressions */
|
||||
import { expect, config } from 'chai'
|
||||
import { createLogger } from 'winston'
|
||||
import { join } from 'path'
|
||||
import { expect, config } from 'chai'
|
||||
import { existsSync } from 'node:fs'
|
||||
import { mkdir, readFile, rm } from 'node:fs/promises'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { Ctranslate2Transcriber, OpenaiTranscriber } from '@peertube/peertube-transcription'
|
||||
import { Ctranslate2Transcriber, OpenaiTranscriber, TranscriptFile } from '@peertube/peertube-transcription'
|
||||
|
||||
config.truncateThreshold = 0
|
||||
|
||||
describe('Whisper CTranslate2 transcriber', function () {
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const frVideoPath = buildAbsoluteFixturePath('transcription/communiquer-lors-dune-classe-transplantee.mp4')
|
||||
const frVideoPath = buildAbsoluteFixturePath('transcription/videos/communiquer-lors-dune-classe-transplantee.mp4')
|
||||
const transcriber = new Ctranslate2Transcriber(
|
||||
{
|
||||
name: 'anyNameShouldBeFineReally',
|
||||
|
@ -30,14 +30,7 @@ describe('Whisper CTranslate2 transcriber', function () {
|
|||
|
||||
it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' })
|
||||
expect(transcript).to.deep.equals({
|
||||
path: join(transcriptDirectory, 'video_short.vtt'),
|
||||
language: 'en',
|
||||
format: 'vtt'
|
||||
})
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.vtt') }))).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(
|
||||
`WEBVTT
|
||||
|
||||
|
@ -50,14 +43,11 @@ You
|
|||
|
||||
it('May produce a transcript file in the `srt` format', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt')
|
||||
expect(transcript).to.deep.equals({
|
||||
expect(transcript.equals(new TranscriptFile({
|
||||
path: join(transcriptDirectory, 'video_short.srt'),
|
||||
language: 'en',
|
||||
format: 'srt'
|
||||
})
|
||||
}))).to.be.true
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(
|
||||
`1
|
||||
00:00:00,000 --> 00:00:02,000
|
||||
|
@ -69,59 +59,50 @@ You
|
|||
|
||||
it('May produce a transcript file in the `txt` format', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt')
|
||||
expect(transcript).to.deep.equals({
|
||||
expect(transcript.equals(new TranscriptFile({
|
||||
path: join(transcriptDirectory, 'video_short.txt'),
|
||||
language: 'en',
|
||||
format: 'txt'
|
||||
})
|
||||
}))).to.be.true
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(`You
|
||||
expect(await transcript.read()).to.equal(`You
|
||||
`)
|
||||
})
|
||||
|
||||
it('May transcribe a media file using a local CTranslate2 model', async function () {
|
||||
const transcript = await transcriber.transcribe(
|
||||
shortVideoPath,
|
||||
{ name: 'myLocalModel', path: buildAbsoluteFixturePath('transcription/tiny-ctranslate2.bin') },
|
||||
{ name: 'myLocalModel', path: buildAbsoluteFixturePath('transcription/faster-whisper-tiny') },
|
||||
'en',
|
||||
'txt'
|
||||
)
|
||||
expect(transcript).to.deep.equals({
|
||||
expect(transcript.equals(new TranscriptFile({
|
||||
path: join(transcriptDirectory, 'video_short.txt'),
|
||||
language: 'en',
|
||||
format: 'txt'
|
||||
})
|
||||
}))).to.be.true
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(`You
|
||||
expect(await transcript.read()).to.equal(`You
|
||||
`)
|
||||
})
|
||||
|
||||
it('May transcribe a media file in french', async function () {
|
||||
this.timeout(45000)
|
||||
const transcript = await transcriber.transcribe(frVideoPath, { name: 'tiny' }, 'fr', 'txt')
|
||||
expect(transcript).to.deep.equals({
|
||||
expect(transcript.equals(new TranscriptFile({
|
||||
path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'),
|
||||
language: 'fr',
|
||||
format: 'txt'
|
||||
})
|
||||
}))).to.be.true
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(
|
||||
expect(await transcript.read()).to.equal(
|
||||
`Communiquez lors d'une classe transplante. Utilisez les photos prises lors de cette classe pour raconter quotidiennement le séjour vécu.
|
||||
C'est le scénario P-Dagujic présenté par monsieur Navoli, professeur ainsi que le 3 sur une école alimentaire de Montpellier.
|
||||
La première application a utilisé ce ralame déatec. L'enseignant va alors transférer les différentes photos réalisés lors de la classe transplante.
|
||||
Dans un dossier, spécifique pour que les élèves puissent le retrouver plus facilement. Il téléverse donc ses photos dans le dossier, dans le venté, dans la médiatèque de la classe.
|
||||
C'est le scénario P.Dagujic présenté par Monsieur Navoli, professeur ainsi que le 3 sur une école alimentaire de Montpellier.
|
||||
La première application utilisée sera la médiatique. L'enseignant va alors transférer les différentes photos réalisés lors de la classe transplante.
|
||||
Dans un dossier, spécifique pour que les élèves puissent le retrouver plus facilement. Il téléverse donc ses photos dans le dossier, dans le venté, dans la médiatique de la classe.
|
||||
Pour terminer, il s'assure que le dossier soit bien ouvert aux utilisateurs afin que tout le monde puisse l'utiliser.
|
||||
Les élèves par la suite utilisera le blog. A partir de leurs nantes, il pourront se loi de parposte rédigeant un article d'un reinté.
|
||||
Ils illustront ses articles à l'aide des photos de que mon numérique mise à n'accélier dans le venté.
|
||||
Pour se faire, il pourront utiliser les diteurs avancés qui les renvèrent directement dans la médiatèque de la classe où il pourront retrouver le dossier créé par leurs enseignants.
|
||||
Une fois leur article terminée, les élèves soumétront se lui-ci au professeur qui pourra soit la noté pour correction ou le public.
|
||||
Ensuite, il pourront lire et commenter ce de leurs camarades ou répondre aux commentaires de la veille.
|
||||
Les élèves par la suite utiliseront le blog, à partir de leur nante, il pourront se loi de parposte rédigeant un article d'un orienté.
|
||||
Ils illustront ces articles à l'aide des photos de commun numériques mises un accès libre dans leaineté. Pour se faire, il pourront utiliser les détecteurs avancés qui des renvers un directement dans la médiatique de la classe, où il pourront retrouver le dossier créé par leur enseignant.
|
||||
Une fois leur article terminée, les élèves soumettront celui-ci au professeur qui pourra soit la noté pour correction ou le public.
|
||||
Ensuite, il pourront lire et commenter ce de leur camarade, on répondra au commentaire de la veille.
|
||||
`
|
||||
)
|
||||
})
|
||||
|
@ -147,9 +128,7 @@ Ensuite, il pourront lire et commenter ce de leurs camarades ou répondre aux co
|
|||
)
|
||||
const openaiTranscript = await openaiTranscriber.transcribe(...transcribeArguments)
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(await readFile(openaiTranscript.path, 'utf8'))
|
||||
expect(transcript.equals(openaiTranscript))
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
|
|
|
@ -7,7 +7,7 @@ import short from 'short-uuid'
|
|||
import { root } from '@peertube/peertube-node-utils'
|
||||
import { TranscriptionEngine } from './transcription-engine.js'
|
||||
import { TranscriptionModel } from './transcription-model.js'
|
||||
import { Transcript, TranscriptFormat } from './transcript.js'
|
||||
import { TranscriptFile, TranscriptFormat } from './transcript/index.js'
|
||||
|
||||
export abstract class AbstractTranscriber {
|
||||
public static DEFAULT_TRANSCRIPT_DIRECTORY = join(root(), 'dist', 'transcripts')
|
||||
|
@ -72,14 +72,10 @@ export abstract class AbstractTranscriber {
|
|||
return `${this.runId}-ended`
|
||||
}
|
||||
|
||||
perf () {
|
||||
// const transcriptionPerformanceObserver = new PerformanceObserver((items) => {
|
||||
// items
|
||||
// .getEntries()
|
||||
// .forEach((entry) => logger.debug(`Transcription n°${entry.name} took ${toHumanReadable(entry.duration)}`, entry))
|
||||
// performance.clearMarks()
|
||||
// })
|
||||
}
|
||||
|
||||
abstract transcribe (mediaFilePath: string, model: TranscriptionModel, language: string, format: TranscriptFormat): Promise<Transcript>
|
||||
abstract transcribe (
|
||||
mediaFilePath: string,
|
||||
model: TranscriptionModel,
|
||||
language: string,
|
||||
format: TranscriptFormat
|
||||
): Promise<TranscriptFile>
|
||||
}
|
||||
|
|
|
@ -3,9 +3,9 @@ import { engines } from './whisper/index.js'
|
|||
|
||||
export * from './duration.js'
|
||||
|
||||
export * from './transcript/index.js'
|
||||
export * from './transcription-engine.js'
|
||||
export * from './transcription-model.js'
|
||||
export * from './transcript.js'
|
||||
export * from './whisper/index.js'
|
||||
|
||||
export const transcriberFactory = new TranscriberFactory(engines)
|
||||
|
|
|
@ -18,10 +18,7 @@ export class TranscriberFactory {
|
|||
logger: Logger = createLogger(),
|
||||
transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY
|
||||
) {
|
||||
const engine = this.engines.find(({ name }) => name === engineName)
|
||||
if (!engine) {
|
||||
throw new Error(`Unknow engine ${engineName}`)
|
||||
}
|
||||
const engine = this.getEngineByName(engineName)
|
||||
|
||||
const transcriberArgs: ConstructorParameters<typeof AbstractTranscriber> = [
|
||||
engine,
|
||||
|
@ -40,4 +37,13 @@ export class TranscriberFactory {
|
|||
throw new Error(`Unimplemented engine ${engineName}`)
|
||||
}
|
||||
}
|
||||
|
||||
getEngineByName (engineName: string) {
|
||||
const engine = this.engines.find(({ name }) => name === engineName)
|
||||
if (!engine) {
|
||||
throw new Error(`Unknow engine ${engineName}`)
|
||||
}
|
||||
|
||||
return engine
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
export type TranscriptFormat = 'txt' | 'vtt' | 'srt'
|
||||
|
||||
export type Transcript = { path: string, language?: string, format: TranscriptFormat }
|
3
packages/transcription/src/transcript/index.ts
Normal file
3
packages/transcription/src/transcript/index.ts
Normal file
|
@ -0,0 +1,3 @@
|
|||
export * from './transcript-file.js'
|
||||
export * from './transcript-file-evaluator.js'
|
||||
export * from './transcript-file-interface.js'
|
|
@ -0,0 +1,75 @@
|
|||
import { $ } from 'execa'
|
||||
import assert from 'node:assert'
|
||||
import { TranscriptFile } from './index.js'
|
||||
|
||||
/**
|
||||
* This transcript evaluator is based on Jiwer CLI, a Python implementation :
|
||||
* https://jitsi.github.io/jiwer/cli/
|
||||
*
|
||||
* There are plenty implementation of WER (Word Error Rate) and CER (Character Error Rate) calculation in Python
|
||||
* but not that many in NodeJs.
|
||||
*/
|
||||
export class TranscriptFileEvaluator {
|
||||
referenceTranscriptFile: TranscriptFile
|
||||
hypothesisTranscriptFile: TranscriptFile
|
||||
|
||||
constructor (referenceTranscriptFile: TranscriptFile, hypothesisTranscriptFile: TranscriptFile) {
|
||||
assert(referenceTranscriptFile.format === 'txt', 'Can only evaluate txt transcript file')
|
||||
assert(hypothesisTranscriptFile.format === 'txt', 'Can only evaluate txt transcript file')
|
||||
|
||||
this.referenceTranscriptFile = referenceTranscriptFile
|
||||
this.hypothesisTranscriptFile = hypothesisTranscriptFile
|
||||
}
|
||||
|
||||
static buildArgs (referenceTranscriptFilepath: string, hypothesisTranscriptFilepath: string, ...args: string[]) {
|
||||
return [
|
||||
'--reference',
|
||||
referenceTranscriptFilepath,
|
||||
'--hypothesis',
|
||||
hypothesisTranscriptFilepath,
|
||||
...args
|
||||
]
|
||||
}
|
||||
|
||||
buildArgs (...args: string[]) {
|
||||
return TranscriptFileEvaluator.buildArgs(this.referenceTranscriptFile.path, this.hypothesisTranscriptFile.path, ...args)
|
||||
}
|
||||
|
||||
/**
|
||||
* WER: Word Error Rate
|
||||
*/
|
||||
async wer () {
|
||||
const { stdout: wer } = await $`jiwer ${this.buildArgs('-g')}`
|
||||
|
||||
return Number(wer)
|
||||
}
|
||||
|
||||
/**
|
||||
* CER: Character Error Rate
|
||||
*/
|
||||
async cer () {
|
||||
// @see https://github.com/jitsi/jiwer/issues/87
|
||||
let result = {
|
||||
stdout: undefined
|
||||
}
|
||||
try {
|
||||
result = await $`jiwer ${this.buildArgs('--cer')}`
|
||||
} catch {}
|
||||
|
||||
return result.stdout ? Number(result.stdout) : undefined
|
||||
}
|
||||
|
||||
async alignement () {
|
||||
const { stdout: alignement } = await $`jiwer ${this.buildArgs('--align')}`
|
||||
|
||||
return alignement
|
||||
}
|
||||
|
||||
async evaluate () {
|
||||
return {
|
||||
wer: await this.wer(),
|
||||
cer: await this.cer(),
|
||||
alignement: await this.alignement()
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
export type TranscriptFormat = 'txt' | 'vtt' | 'srt'
|
||||
|
||||
export type TranscriptFileInterface = { path: string, language?: string, format: TranscriptFormat }
|
50
packages/transcription/src/transcript/transcript-file.ts
Normal file
50
packages/transcription/src/transcript/transcript-file.ts
Normal file
|
@ -0,0 +1,50 @@
|
|||
import { statSync } from 'node:fs'
|
||||
import { readFile, writeFile } from 'node:fs/promises'
|
||||
import { TranscriptFileInterface, TranscriptFormat } from './transcript-file-interface.js'
|
||||
import { TranscriptFileEvaluator } from './transcript-file-evaluator.js'
|
||||
|
||||
export class TranscriptFile implements TranscriptFileInterface {
|
||||
path: string
|
||||
language: string = 'en'
|
||||
format: TranscriptFormat = 'vtt'
|
||||
|
||||
constructor ({ path, language = 'en', format = 'vtt' }: { path: string, language?: string, format?: TranscriptFormat }) {
|
||||
statSync(path)
|
||||
|
||||
this.path = path
|
||||
this.language = language
|
||||
this.format = format
|
||||
}
|
||||
|
||||
/**
|
||||
* Asynchronously reads the entire contents of a transcript file.
|
||||
* @see https://nodejs.org/docs/latest-v18.x/api/fs.html#filehandlereadfileoptions for options
|
||||
*/
|
||||
async read (options: Parameters<typeof readFile>[1] = 'utf8') {
|
||||
return await readFile(this.path, options)
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a transcript file to disk.
|
||||
*/
|
||||
static async write ({
|
||||
path,
|
||||
content,
|
||||
language = 'en',
|
||||
format = 'vtt'
|
||||
}: { path: string, content: string, language?: string, format?: TranscriptFormat }): Promise<TranscriptFile> {
|
||||
await writeFile(path, content)
|
||||
|
||||
return new TranscriptFile({ path, language, format })
|
||||
}
|
||||
|
||||
async equals (transcript: TranscriptFile) {
|
||||
return await transcript.read() === await this.read()
|
||||
}
|
||||
|
||||
async evaluate (transcript: TranscriptFile) {
|
||||
const evaluator = new TranscriptFileEvaluator(this, transcript)
|
||||
|
||||
return evaluator.evaluate()
|
||||
}
|
||||
}
|
|
@ -1,11 +1,10 @@
|
|||
import { $ } from 'execa'
|
||||
import { join } from 'path'
|
||||
import { lstat } from 'node:fs/promises'
|
||||
import { OpenaiTranscriber } from './openai-transcriber.js'
|
||||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { $ } from 'execa'
|
||||
import { TranscriptFile, TranscriptFormat } from '../../transcript/index.js'
|
||||
import { getFileInfo } from '../../file-utils.js'
|
||||
import { join } from 'path'
|
||||
import { copyFile, rm } from 'node:fs/promises'
|
||||
import { dirname, basename } from 'node:path'
|
||||
|
||||
export class Ctranslate2Transcriber extends OpenaiTranscriber {
|
||||
public static readonly MODEL_FILENAME = 'model.bin'
|
||||
|
@ -15,21 +14,17 @@ export class Ctranslate2Transcriber extends OpenaiTranscriber {
|
|||
model: TranscriptionModel = { name: 'tiny' },
|
||||
language: string = 'en',
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
): Promise<TranscriptFile> {
|
||||
this.createPerformanceMark()
|
||||
// Shall we run the command with `{ shell: true }` to get the same error as in sh ?
|
||||
// ex: ENOENT => Command not found
|
||||
const $$ = $({ verbose: true })
|
||||
const { baseName } = getFileInfo(mediaFilePath)
|
||||
|
||||
let modelFilepath = model.path
|
||||
const shouldCreateModelCopy = (model.path && basename(model.path) !== Ctranslate2Transcriber.MODEL_FILENAME)
|
||||
if (shouldCreateModelCopy) {
|
||||
modelFilepath = join(dirname(model.path), Ctranslate2Transcriber.MODEL_FILENAME)
|
||||
await copyFile(model.path, modelFilepath)
|
||||
if (model.path) {
|
||||
await lstat(model.path).then(stats => stats.isDirectory())
|
||||
}
|
||||
|
||||
const modelArgs = model.path ? [ '--model_directory', dirname(model.path) ] : [ '--model', model.name ]
|
||||
const modelArgs = model.path ? [ '--model_directory', model.path ] : [ '--model', model.name ]
|
||||
|
||||
await $$`${this.engine.binary} ${[
|
||||
mediaFilePath,
|
||||
|
@ -42,16 +37,12 @@ export class Ctranslate2Transcriber extends OpenaiTranscriber {
|
|||
language
|
||||
]}`
|
||||
|
||||
if (shouldCreateModelCopy) {
|
||||
// await rm(modelFilepath)
|
||||
}
|
||||
|
||||
this.measurePerformanceMark()
|
||||
|
||||
return {
|
||||
return new TranscriptFile({
|
||||
language,
|
||||
path: join(this.transcriptDirectory, `${baseName}.${format}`),
|
||||
format
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import { join } from 'path'
|
||||
import { $ } from 'execa'
|
||||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { TranscriptFile, TranscriptFormat } from '../../transcript/index.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
import { getFileInfo } from '../../file-utils.js'
|
||||
|
||||
|
@ -11,7 +11,7 @@ export class OpenaiTranscriber extends AbstractTranscriber {
|
|||
model: TranscriptionModel = { name: 'tiny' },
|
||||
language: string = 'en',
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
): Promise<TranscriptFile> {
|
||||
this.createPerformanceMark()
|
||||
// Shall we run the command with `{ shell: true }` to get the same error as in sh ?
|
||||
// ex: ENOENT => Command not found
|
||||
|
@ -32,10 +32,10 @@ export class OpenaiTranscriber extends AbstractTranscriber {
|
|||
|
||||
this.measurePerformanceMark()
|
||||
|
||||
return {
|
||||
return new TranscriptFile({
|
||||
language,
|
||||
path: join(this.transcriptDirectory, `${baseName}.${format}`),
|
||||
format
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
import { $ } from 'execa'
|
||||
import assert from 'node:assert'
|
||||
import { join } from 'node:path'
|
||||
import { existsSync } from 'node:fs'
|
||||
import { rename } from 'node:fs/promises'
|
||||
import { $ } from 'execa'
|
||||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { TranscriptFile, TranscriptFormat } from '../../transcript/index.js'
|
||||
import { getFileInfo } from '../../file-utils.js'
|
||||
import { OpenaiTranscriber } from './openai-transcriber.js'
|
||||
|
||||
|
@ -14,7 +14,7 @@ export class WhisperTimestampedTranscriber extends OpenaiTranscriber {
|
|||
model: TranscriptionModel,
|
||||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
): Promise<TranscriptFile> {
|
||||
this.createPerformanceMark()
|
||||
|
||||
const $$ = $({ verbose: true })
|
||||
|
@ -37,10 +37,10 @@ export class WhisperTimestampedTranscriber extends OpenaiTranscriber {
|
|||
|
||||
this.measurePerformanceMark()
|
||||
|
||||
return {
|
||||
return new TranscriptFile({
|
||||
language,
|
||||
path: transcriptPath,
|
||||
format
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,18 +1,18 @@
|
|||
import { TranscriptionModel } from "../../transcription-model.js";
|
||||
import { AbstractTranscriber } from "../../abstract-transcriber.js";
|
||||
import { Transcript, TranscriptFormat } from "../../transcript.js";
|
||||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
import { TranscriptFile, TranscriptFormat } from '../../transcript/index.js'
|
||||
|
||||
// Disable local models
|
||||
// env.allowLocalModels = true
|
||||
|
||||
export class TransformersJsTranscriber extends AbstractTranscriber {
|
||||
async transcribe(
|
||||
async transcribe (
|
||||
mediaFilePath: string,
|
||||
model: TranscriptionModel,
|
||||
language: string,
|
||||
format: TranscriptFormat = "vtt",
|
||||
): Promise<Transcript> {
|
||||
return Promise.resolve(undefined);
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<TranscriptFile> {
|
||||
return Promise.resolve(undefined)
|
||||
// return pipeline('automatic-speech-recognition', 'no_attentions', {
|
||||
// // For medium models, we need to load the `no_attentions` revision to avoid running out of memory
|
||||
// revision: [].includes('/whisper-medium') ? 'no_attentions' : 'main'
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { TranscriptFile, TranscriptFormat } from '../../transcript/transcriptFile.js'
|
||||
import { $ } from 'execa'
|
||||
import { join } from 'path'
|
||||
|
||||
|
@ -10,7 +10,7 @@ export class TransformersTranscriber extends AbstractTranscriber {
|
|||
model: TranscriptionModel,
|
||||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
): Promise<TranscriptFile> {
|
||||
const $$ = $({ verbose: true })
|
||||
// const ffmpegChildProcess = $$`ffmpeg ${[
|
||||
// '-i',
|
||||
|
|
Loading…
Reference in a new issue