From 575fbac0994c584119bd9df1e10bbe0a2fb5482d Mon Sep 17 00:00:00 2001 From: ztimson Date: Thu, 12 Feb 2026 13:31:30 -0500 Subject: [PATCH] Fixed ASR --- README.md | 6 ++++-- package.json | 2 +- src/audio.ts | 9 +++++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 05eefc1..a1007c0 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,7 @@ A TypeScript library that provides a unified interface for working with multiple #### Instructions 1. Install the package: `npm i @ztimson/ai-utils` +2. For speaker diarization: `pip install pyannote.audio` @@ -90,8 +91,9 @@ A TypeScript library that provides a unified interface for working with multiple #### Instructions 1. Install the dependencies: `npm i` -2. Build library: `npm build` -3. Run unit tests: `npm test` +2. For speaker diarization: `pip install pyannote.audio` +3. Build library: `npm build` +4. Run unit tests: `npm test` diff --git a/package.json b/package.json index 45f6e19..d9dfd79 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@ztimson/ai-utils", - "version": "0.6.1", + "version": "0.6.2", "description": "AI Utility library", "author": "Zak Timson", "license": "MIT", diff --git a/src/audio.ts b/src/audio.ts index 81811d5..6c2f7cd 100644 --- a/src/audio.ts +++ b/src/audio.ts @@ -1,6 +1,6 @@ -import { spawn } from 'node:child_process'; -import { pipeline } from '@xenova/transformers'; -import { AbortablePromise, Ai } from './ai.ts'; +import {spawn} from 'node:child_process'; +import {pipeline, read_audio} from '@xenova/transformers'; +import {AbortablePromise, Ai} from './ai.ts'; export class Audio { private whisperPipeline: any; @@ -96,7 +96,8 @@ print(json.dumps(segments)) // Transcript if(aborted) return resolve(null); - const transcriptResult = await this.whisperPipeline(path, {return_timestamps: speaker ? 'word' : false, chunk_length_s: 30,}); + const audio = await read_audio(path, 16000); + const transcriptResult = await this.whisperPipeline(audio, {return_timestamps: speaker ? 'word' : false, chunk_length_s: 30,}); if(!speaker) return resolve(transcriptResult.text?.trim() || null); // Speaker Diarization