import {fileURLToPath} from 'url'; import {Worker} from 'worker_threads'; import {AbortablePromise, Ai} from './ai.ts'; import {canDiarization} from './asr.ts'; import {dirname, join} from 'path'; export class Audio { constructor(private ai: Ai) {} asr(file: string, options: { model?: string; speaker?: boolean | 'id' } = {}): AbortablePromise { const { model = this.ai.options.asr || 'whisper-base', speaker = false } = options; let aborted = false; const abort = () => { aborted = true; }; let p = new Promise((resolve, reject) => { const worker = new Worker(join(dirname(fileURLToPath(import.meta.url)), 'asr.js')); const handleMessage = ({ text, warning, error }: any) => { worker.terminate(); if(aborted) return; if(error) reject(new Error(error)); else { if(warning) console.warn(warning); resolve(text); } }; const handleError = (err: Error) => { worker.terminate(); if(!aborted) reject(err); }; worker.on('message', handleMessage); worker.on('error', handleError); worker.on('exit', (code) => { if(code !== 0 && !aborted) reject(new Error(`Worker exited with code ${code}`)); }); worker.postMessage({file, model, speaker, modelDir: this.ai.options.path, token: this.ai.options.hfToken}); }); // Name speakers using AI if(options.speaker == 'id') { if(!this.ai.language.defaultModel) throw new Error('Configure an LLM for advanced ASR speaker detection'); p = p.then(async transcript => { if(!transcript) return transcript; let chunks = this.ai.language.chunk(transcript, 500, 0); if(chunks.length > 4) chunks = [...chunks.slice(0, 3), chunks.at(-1)]; const names = await this.ai.language.json(chunks.join('\n'), '{1: "Detected Name"}', { system: 'Use this following transcript to identify speakers. Only identify speakers you are sure about', temperature: 0.1, }); Object.entries(names).forEach(([speaker, name]) => { transcript = (transcript).replaceAll(`[Speaker ${speaker}]`, `[${name}]`); }); return transcript; }) } return Object.assign(p, { abort }); } canDiarization = canDiarization; }