From 575fbac0994c584119bd9df1e10bbe0a2fb5482d Mon Sep 17 00:00:00 2001
From: ztimson <zaktimson@gmail.com>
Date: Thu, 12 Feb 2026 13:31:30 -0500
Subject: [PATCH] Fixed ASR

---
 README.md    | 6 ++++--
 package.json | 2 +-
 src/audio.ts | 9 +++++----
 3 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/README.md b/README.md
index 05eefc1..a1007c0 100644
--- a/README.md
+++ b/README.md
@@ -75,6 +75,7 @@ A TypeScript library that provides a unified interface for working with multiple
 
 #### Instructions
 1. Install the package: `npm i @ztimson/ai-utils`
+2. For speaker diarization: `pip install pyannote.audio`
 
 </details>
 
@@ -90,8 +91,9 @@ A TypeScript library that provides a unified interface for working with multiple
 
 #### Instructions
 1. Install the dependencies: `npm i`
-2. Build library: `npm build`
-3. Run unit tests: `npm test`
+2. For speaker diarization: `pip install pyannote.audio`
+3. Build library: `npm build`
+4. Run unit tests: `npm test`
 
 </details>
 
diff --git a/package.json b/package.json
index 45f6e19..d9dfd79 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@ztimson/ai-utils",
-	"version": "0.6.1",
+	"version": "0.6.2",
 	"description": "AI Utility library",
 	"author": "Zak Timson",
 	"license": "MIT",
diff --git a/src/audio.ts b/src/audio.ts
index 81811d5..6c2f7cd 100644
--- a/src/audio.ts
+++ b/src/audio.ts
@@ -1,6 +1,6 @@
-import { spawn } from 'node:child_process';
-import { pipeline } from '@xenova/transformers';
-import { AbortablePromise, Ai } from './ai.ts';
+import {spawn} from 'node:child_process';
+import {pipeline, read_audio} from '@xenova/transformers';
+import {AbortablePromise, Ai} from './ai.ts';
 
 export class Audio {
 	private whisperPipeline: any;
@@ -96,7 +96,8 @@ print(json.dumps(segments))
 
 				// Transcript
 				if(aborted) return resolve(null);
-				const transcriptResult = await this.whisperPipeline(path, {return_timestamps: speaker ? 'word' : false, chunk_length_s: 30,});
+				const audio = await read_audio(path, 16000);
+				const transcriptResult = await this.whisperPipeline(audio, {return_timestamps: speaker ? 'word' : false, chunk_length_s: 30,});
 				if(!speaker) return resolve(transcriptResult.text?.trim() || null);
 
 				// Speaker Diarization