Working speaker detection with advanced LLM identifying. Improved LLM json function

2026-02-14 09:39:17 -05:00
parent 0360f2493d
commit 4143d00de7
4 changed files with 90 additions and 56 deletions
--- a/src/audio.ts
+++ b/src/audio.ts
@@ -7,12 +7,12 @@ import {dirname, join} from 'path';
 export class Audio {
 	constructor(private ai: Ai) {}

-	asr(file: string, options: { model?: string; speaker?: boolean } = {}): AbortablePromise<string | null> {
+	asr(file: string, options: { model?: string; speaker?: boolean | 'id' } = {}): AbortablePromise<string | null> {
 		const { model = this.ai.options.asr || 'whisper-base', speaker = false } = options;
 		let aborted = false;
 		const abort = () => { aborted = true; };

-		const p = new Promise<string | null>((resolve, reject) => {
+		let p = new Promise<string | null>((resolve, reject) => {
 			const worker = new Worker(join(dirname(fileURLToPath(import.meta.url)), 'asr.js'));
 			const handleMessage = ({ text, warning, error }: any) => {
 				worker.terminate();
@@ -34,6 +34,23 @@ export class Audio {
 			});
 			worker.postMessage({file, model, speaker, modelDir: this.ai.options.path, token: this.ai.options.hfToken});
 		});
+
+		// Name speakers using AI
+		if(options.speaker == 'id') {
+			if(!this.ai.language.defaultModel) throw new Error('Configure an LLM for advanced ASR speaker detection');
+			p = p.then(async transcript => {
+				if(!transcript) return transcript;
+				const names = await this.ai.language.json(transcript, '{1: "Detected Name"}', {
+					system: 'Use this following transcript to identify speakers. Only identify speakers you are sure about',
+					temperature: 0.2,
+				});
+				Object.entries(names).forEach(([speaker, name]) => {
+					transcript = (<string>transcript).replaceAll(`[Speaker ${speaker}]`, `[${name}]`);
+				});
+				return transcript;
+			})
+		}
+
 		return Object.assign(p, { abort });
 	}