diff --git a/package.json b/package.json index dd3c3dc..db20f88 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@ztimson/ai-utils", - "version": "0.7.6", + "version": "0.7.7", "description": "AI Utility library", "author": "Zak Timson", "license": "MIT", diff --git a/src/asr.ts b/src/asr.ts index fea7e15..f934481 100644 --- a/src/asr.ts +++ b/src/asr.ts @@ -7,6 +7,8 @@ import { join } from 'node:path'; import { tmpdir } from 'node:os'; import wavefile from 'wavefile'; +let whisperPipeline: any; + export async function canDiarization(): Promise { const checkPython = (cmd: string) => { return new Promise((resolve) => { @@ -110,12 +112,13 @@ function prepareAudioBuffer(file: string): [string, Float32Array] { parentPort?.on('message', async ({ file, speaker, model, modelDir, token }) => { let tempFile = null; try { - const asr: any = await pipeline('automatic-speech-recognition', `Xenova/${model}`, {cache_dir: modelDir, quantized: true}); + if(!whisperPipeline) whisperPipeline = await pipeline('automatic-speech-recognition', `Xenova/${model}`, {cache_dir: modelDir, quantized: true}); + const [f, buffer] = prepareAudioBuffer(file); tempFile = f !== file ? f : null; const hasDiarization = await canDiarization(); const [transcript, speakers] = await Promise.all([ - asr(buffer, {return_timestamps: speaker ? 'word' : false}), + whisperPipeline(buffer, {return_timestamps: speaker ? 'word' : false}), (!speaker || !token || !hasDiarization) ? Promise.resolve(): runDiarization(hasDiarization, f, modelDir, token), ]); diff --git a/src/audio.ts b/src/audio.ts index 2474b45..6e3ea3a 100644 --- a/src/audio.ts +++ b/src/audio.ts @@ -5,37 +5,59 @@ import {canDiarization} from './asr.ts'; import {dirname, join} from 'path'; export class Audio { + private busy = false; + private currentJob: any; + private queue: Array<{file: string, model: string, speaker: boolean | 'id', modelDir: string, token: string, resolve: any, reject: any}> = []; + private worker: Worker | null = null; + constructor(private ai: Ai) {} + private processQueue() { + if(this.busy || !this.queue.length) return; + + this.busy = true; + const job = this.queue.shift()!; + if(!this.worker) { + this.worker = new Worker(join(dirname(fileURLToPath(import.meta.url)), 'asr.js')); + this.worker.on('message', this.handleMessage.bind(this)); + this.worker.on('error', this.handleError.bind(this)); + } + + this.currentJob = job; + this.worker.postMessage({file: job.file, model: job.model, speaker: job.speaker, modelDir: job.modelDir, token: job.token}); + } + + private handleMessage({text, warning, error}: any) { + const job = this.currentJob!; + this.busy = false; + if(error) job.reject(new Error(error)); + else { + if(warning) console.warn(warning); + job.resolve(text); + } + this.processQueue(); + } + + private handleError(err: Error) { + if(this.currentJob) { + this.currentJob.reject(err); + this.busy = false; + this.processQueue(); + } + } + asr(file: string, options: { model?: string; speaker?: boolean | 'id' } = {}): AbortablePromise { const { model = this.ai.options.asr || 'whisper-base', speaker = false } = options; let aborted = false; const abort = () => { aborted = true; }; - let p = new Promise((resolve, reject) => { - const worker = new Worker(join(dirname(fileURLToPath(import.meta.url)), 'asr.js')); - const handleMessage = ({ text, warning, error }: any) => { - setTimeout(() => worker.terminate(), 1000); - if(aborted) return; - if(error) reject(new Error(error)); - else { - if(warning) console.warn(warning); - resolve(text); - } - }; - const handleError = (err: Error) => { - setTimeout(() => worker.terminate(), 1000); - if(!aborted) reject(err); - }; - worker.on('message', handleMessage); - worker.on('error', handleError); - worker.on('exit', (code) => { - if(code !== 0 && !aborted) reject(new Error(`Worker exited with code ${code}`)); + this.queue.push({file, model, speaker, modelDir: this.ai.options.path, token: this.ai.options.hfToken, + resolve: (text: string | null) => !aborted && resolve(text), + reject: (err: Error) => !aborted && reject(err) }); - worker.postMessage({file, model, speaker, modelDir: this.ai.options.path, token: this.ai.options.hfToken}); + this.processQueue(); }); - // Name speakers using AI if(options.speaker == 'id') { if(!this.ai.language.defaultModel) throw new Error('Configure an LLM for advanced ASR speaker detection'); p = p.then(async transcript => { diff --git a/src/vision.ts b/src/vision.ts index 484ce43..911e5fb 100644 --- a/src/vision.ts +++ b/src/vision.ts @@ -2,8 +2,26 @@ import {createWorker} from 'tesseract.js'; import {AbortablePromise, Ai} from './ai.ts'; export class Vision { + private worker: any = null; + private queue: Array<{ path: string, resolve: any, reject: any }> = []; + private busy = false; - constructor(private ai: Ai) { } + constructor(private ai: Ai) {} + + private async processQueue() { + if(this.busy || !this.queue.length) return; + this.busy = true; + const job = this.queue.shift()!; + if(!this.worker) this.worker = await createWorker(this.ai.options.ocr || 'eng', 2, {cachePath: this.ai.options.path}); + try { + const {data} = await this.worker.recognize(job.path); + job.resolve(data.text.trim() || null); + } catch(err) { + job.reject(err); + } + this.busy = false; + this.processQueue(); + } /** * Convert image to text using Optical Character Recognition @@ -11,13 +29,16 @@ export class Vision { * @returns {AbortablePromise} Promise of extracted text with abort method */ ocr(path: string): AbortablePromise { - let worker: any; - const p = new Promise(async res => { - worker = await createWorker(this.ai.options.ocr || 'eng', 2, {cachePath: this.ai.options.path}); - const {data} = await worker.recognize(path); - await worker.terminate(); - res(data.text.trim() || null); + let aborted = false; + const abort = () => { aborted = true; }; + const p = new Promise((resolve, reject) => { + this.queue.push({ + path, + resolve: (text: string | null) => !aborted && resolve(text), + reject: (err: Error) => !aborted && reject(err) + }); + this.processQueue(); }); - return Object.assign(p, {abort: () => worker?.terminate()}); + return Object.assign(p, {abort}); } }