Working speaker detection with advanced LLM identifying. Improved LLM json function
This commit is contained in:
21
src/audio.ts
21
src/audio.ts
@@ -7,12 +7,12 @@ import {dirname, join} from 'path';
|
||||
export class Audio {
|
||||
constructor(private ai: Ai) {}
|
||||
|
||||
asr(file: string, options: { model?: string; speaker?: boolean } = {}): AbortablePromise<string | null> {
|
||||
asr(file: string, options: { model?: string; speaker?: boolean | 'id' } = {}): AbortablePromise<string | null> {
|
||||
const { model = this.ai.options.asr || 'whisper-base', speaker = false } = options;
|
||||
let aborted = false;
|
||||
const abort = () => { aborted = true; };
|
||||
|
||||
const p = new Promise<string | null>((resolve, reject) => {
|
||||
let p = new Promise<string | null>((resolve, reject) => {
|
||||
const worker = new Worker(join(dirname(fileURLToPath(import.meta.url)), 'asr.js'));
|
||||
const handleMessage = ({ text, warning, error }: any) => {
|
||||
worker.terminate();
|
||||
@@ -34,6 +34,23 @@ export class Audio {
|
||||
});
|
||||
worker.postMessage({file, model, speaker, modelDir: this.ai.options.path, token: this.ai.options.hfToken});
|
||||
});
|
||||
|
||||
// Name speakers using AI
|
||||
if(options.speaker == 'id') {
|
||||
if(!this.ai.language.defaultModel) throw new Error('Configure an LLM for advanced ASR speaker detection');
|
||||
p = p.then(async transcript => {
|
||||
if(!transcript) return transcript;
|
||||
const names = await this.ai.language.json(transcript, '{1: "Detected Name"}', {
|
||||
system: 'Use this following transcript to identify speakers. Only identify speakers you are sure about',
|
||||
temperature: 0.2,
|
||||
});
|
||||
Object.entries(names).forEach(([speaker, name]) => {
|
||||
transcript = (<string>transcript).replaceAll(`[Speaker ${speaker}]`, `[${name}]`);
|
||||
});
|
||||
return transcript;
|
||||
})
|
||||
}
|
||||
|
||||
return Object.assign(p, { abort });
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user