61 lines
2.2 KiB
TypeScript
61 lines
2.2 KiB
TypeScript
import {fileURLToPath} from 'url';
|
|
import {Worker} from 'worker_threads';
|
|
import {AbortablePromise, Ai} from './ai.ts';
|
|
import {canDiarization} from './asr.ts';
|
|
import {dirname, join} from 'path';
|
|
|
|
export class Audio {
|
|
constructor(private ai: Ai) {}
|
|
|
|
asr(file: string, options: { model?: string; speaker?: boolean | 'id' } = {}): AbortablePromise<string | null> {
|
|
const { model = this.ai.options.asr || 'whisper-base', speaker = false } = options;
|
|
let aborted = false;
|
|
const abort = () => { aborted = true; };
|
|
|
|
let p = new Promise<string | null>((resolve, reject) => {
|
|
const worker = new Worker(join(dirname(fileURLToPath(import.meta.url)), 'asr.js'));
|
|
const handleMessage = ({ text, warning, error }: any) => {
|
|
worker.terminate();
|
|
if(aborted) return;
|
|
if(error) reject(new Error(error));
|
|
else {
|
|
if(warning) console.warn(warning);
|
|
resolve(text);
|
|
}
|
|
};
|
|
const handleError = (err: Error) => {
|
|
worker.terminate();
|
|
if(!aborted) reject(err);
|
|
};
|
|
worker.on('message', handleMessage);
|
|
worker.on('error', handleError);
|
|
worker.on('exit', (code) => {
|
|
if(code !== 0 && !aborted) reject(new Error(`Worker exited with code ${code}`));
|
|
});
|
|
worker.postMessage({file, model, speaker, modelDir: this.ai.options.path, token: this.ai.options.hfToken});
|
|
});
|
|
|
|
// Name speakers using AI
|
|
if(options.speaker == 'id') {
|
|
if(!this.ai.language.defaultModel) throw new Error('Configure an LLM for advanced ASR speaker detection');
|
|
p = p.then(async transcript => {
|
|
if(!transcript) return transcript;
|
|
let chunks = this.ai.language.chunk(transcript, 500, 0);
|
|
if(chunks.length > 4) chunks = [...chunks.slice(0, 3), <string>chunks.at(-1)];
|
|
const names = await this.ai.language.json(chunks.join('\n'), '{1: "Detected Name"}', {
|
|
system: 'Use this following transcript to identify speakers. Only identify speakers you are sure about',
|
|
temperature: 0.1,
|
|
});
|
|
Object.entries(names).forEach(([speaker, name]) => {
|
|
transcript = (<string>transcript).replaceAll(`[Speaker ${speaker}]`, `[${name}]`);
|
|
});
|
|
return transcript;
|
|
})
|
|
}
|
|
|
|
return Object.assign(p, { abort });
|
|
}
|
|
|
|
canDiarization = canDiarization;
|
|
}
|