Fixed ASR

expose diarization support checking function
2026-02-12 13:31:30 -05:00 · 2026-02-12 11:55:29 -05:00
3 changed files with 12 additions and 9 deletions
--- a/README.md
+++ b/README.md
@@ -75,6 +75,7 @@ A TypeScript library that provides a unified interface for working with multiple

 #### Instructions
 1. Install the package: `npm i @ztimson/ai-utils`
+2. For speaker diarization: `pip install pyannote.audio`

 </details>

@@ -90,8 +91,9 @@ A TypeScript library that provides a unified interface for working with multiple

 #### Instructions
 1. Install the dependencies: `npm i`
-2. Build library: `npm build`
-3. Run unit tests: `npm test`
+2. For speaker diarization: `pip install pyannote.audio`
+3. Build library: `npm build`
+4. Run unit tests: `npm test`

 </details>

--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@ztimson/ai-utils",
-	"version": "0.6.0",
+	"version": "0.6.2",
 	"description": "AI Utility library",
 	"author": "Zak Timson",
 	"license": "MIT",
--- a/src/audio.ts
+++ b/src/audio.ts
@@ -1,5 +1,5 @@
 import {spawn} from 'node:child_process';
-import { pipeline } from '@xenova/transformers';
+import {pipeline, read_audio} from '@xenova/transformers';
 import {AbortablePromise, Ai} from './ai.ts';

 export class Audio {
@@ -33,7 +33,7 @@ export class Audio {
 		return lines.join('\n');
 	}

-	private async isPyannoteInstalled(): Promise<boolean> {
+	async canDiarization(): Promise<boolean> {
 		return new Promise((resolve) => {
 			const proc = spawn('python3', ['-c', 'import pyannote.audio']);
 			proc.on('close', (code: number) => resolve(code === 0));
@@ -42,7 +42,7 @@ export class Audio {
 	}

 	private async runDiarization(audioPath: string): Promise<any[]> {
-		if(!await this.isPyannoteInstalled()) throw new Error('Pyannote is not installed: pip install pyannote.audio');
+		if(!await this.canDiarization()) throw new Error('Pyannote is not installed: pip install pyannote.audio');
 		const script = `
 import sys
 import json
@@ -96,7 +96,8 @@ print(json.dumps(segments))

 				// Transcript
 				if(aborted) return resolve(null);
-				const transcriptResult = await this.whisperPipeline(path, {return_timestamps: speaker ? 'word' : false, chunk_length_s: 30,});
+				const audio = await read_audio(path, 16000);
+				const transcriptResult = await this.whisperPipeline(audio, {return_timestamps: speaker ? 'word' : false, chunk_length_s: 30,});
 				if(!speaker) return resolve(transcriptResult.text?.trim() || null);

 				// Speaker Diarization
Author	SHA1	Message	Date
ztimson	575fbac099	Fixed ASR All checks were successful Publish Library / Build NPM Project (push) Successful in 30s Details Publish Library / Tag Version (push) Successful in 4s Details	2026-02-12 13:31:30 -05:00
ztimson	46ae0f7913	expose diarization support checking function All checks were successful Publish Library / Build NPM Project (push) Successful in 25s Details Publish Library / Tag Version (push) Successful in 5s Details	2026-02-12 11:55:29 -05:00