Compare commits

..

2 Commits
0.7.5 ... 0.7.7

Author SHA1 Message Date
790608f020 Queue OCR & ASR work
All checks were successful
Publish Library / Build NPM Project (push) Successful in 35s
Publish Library / Tag Version (push) Successful in 6s
2026-02-20 19:05:19 -05:00
473424ae23 segfault fix
All checks were successful
Publish Library / Build NPM Project (push) Successful in 33s
Publish Library / Tag Version (push) Successful in 6s
2026-02-20 17:31:49 -05:00
4 changed files with 77 additions and 36 deletions

View File

@@ -1,6 +1,6 @@
{ {
"name": "@ztimson/ai-utils", "name": "@ztimson/ai-utils",
"version": "0.7.5", "version": "0.7.7",
"description": "AI Utility library", "description": "AI Utility library",
"author": "Zak Timson", "author": "Zak Timson",
"license": "MIT", "license": "MIT",

View File

@@ -110,30 +110,28 @@ function prepareAudioBuffer(file: string): [string, Float32Array] {
} }
parentPort?.on('message', async ({ file, speaker, model, modelDir, token }) => { parentPort?.on('message', async ({ file, speaker, model, modelDir, token }) => {
let tempFile = null;
try { try {
if(!whisperPipeline) whisperPipeline = await pipeline('automatic-speech-recognition', `Xenova/${model}`, {cache_dir: modelDir, quantized: true}); if(!whisperPipeline) whisperPipeline = await pipeline('automatic-speech-recognition', `Xenova/${model}`, {cache_dir: modelDir, quantized: true});
// Prepare audio file
const [f, buffer] = prepareAudioBuffer(file); const [f, buffer] = prepareAudioBuffer(file);
tempFile = f !== file ? f : null;
// Fetch transcript and speakers
const hasDiarization = await canDiarization(); const hasDiarization = await canDiarization();
const [transcript, speakers] = await Promise.all([ const [transcript, speakers] = await Promise.all([
whisperPipeline(buffer, {return_timestamps: speaker ? 'word' : false}), whisperPipeline(buffer, {return_timestamps: speaker ? 'word' : false}),
(!speaker || !token || !hasDiarization) ? Promise.resolve(): runDiarization(hasDiarization, f, modelDir, token), (!speaker || !token || !hasDiarization) ? Promise.resolve(): runDiarization(hasDiarization, f, modelDir, token),
]); ]);
if(file != f) rmSync(f, { recursive: true, force: true });
// Return any results / errors if no more processing required
const text = transcript.text?.trim() || null; const text = transcript.text?.trim() || null;
if(!speaker) return parentPort?.postMessage({ text }); if(!speaker) return parentPort?.postMessage({ text });
if(!token) return parentPort?.postMessage({ text, error: 'HuggingFace token required' }); if(!token) return parentPort?.postMessage({ text, error: 'HuggingFace token required' });
if(!hasDiarization) return parentPort?.postMessage({ text, error: 'Speaker diarization unavailable' }); if(!hasDiarization) return parentPort?.postMessage({ text, error: 'Speaker diarization unavailable' });
// Combine transcript and speakers
const combined = combineSpeakerTranscript(transcript.chunks || [], speakers || []); const combined = combineSpeakerTranscript(transcript.chunks || [], speakers || []);
parentPort?.postMessage({ text: combined }); parentPort?.postMessage({ text: combined });
} catch (err: any) { } catch (err: any) {
parentPort?.postMessage({ error: err.stack || err.message }); parentPort?.postMessage({ error: err.stack || err.message });
} finally {
if(tempFile) rmSync(tempFile, { recursive: true, force: true });
} }
}); });

View File

@@ -5,37 +5,59 @@ import {canDiarization} from './asr.ts';
import {dirname, join} from 'path'; import {dirname, join} from 'path';
export class Audio { export class Audio {
private busy = false;
private currentJob: any;
private queue: Array<{file: string, model: string, speaker: boolean | 'id', modelDir: string, token: string, resolve: any, reject: any}> = [];
private worker: Worker | null = null;
constructor(private ai: Ai) {} constructor(private ai: Ai) {}
private processQueue() {
if(this.busy || !this.queue.length) return;
this.busy = true;
const job = this.queue.shift()!;
if(!this.worker) {
this.worker = new Worker(join(dirname(fileURLToPath(import.meta.url)), 'asr.js'));
this.worker.on('message', this.handleMessage.bind(this));
this.worker.on('error', this.handleError.bind(this));
}
this.currentJob = job;
this.worker.postMessage({file: job.file, model: job.model, speaker: job.speaker, modelDir: job.modelDir, token: job.token});
}
private handleMessage({text, warning, error}: any) {
const job = this.currentJob!;
this.busy = false;
if(error) job.reject(new Error(error));
else {
if(warning) console.warn(warning);
job.resolve(text);
}
this.processQueue();
}
private handleError(err: Error) {
if(this.currentJob) {
this.currentJob.reject(err);
this.busy = false;
this.processQueue();
}
}
asr(file: string, options: { model?: string; speaker?: boolean | 'id' } = {}): AbortablePromise<string | null> { asr(file: string, options: { model?: string; speaker?: boolean | 'id' } = {}): AbortablePromise<string | null> {
const { model = this.ai.options.asr || 'whisper-base', speaker = false } = options; const { model = this.ai.options.asr || 'whisper-base', speaker = false } = options;
let aborted = false; let aborted = false;
const abort = () => { aborted = true; }; const abort = () => { aborted = true; };
let p = new Promise<string | null>((resolve, reject) => { let p = new Promise<string | null>((resolve, reject) => {
const worker = new Worker(join(dirname(fileURLToPath(import.meta.url)), 'asr.js')); this.queue.push({file, model, speaker, modelDir: <string>this.ai.options.path, token: <string>this.ai.options.hfToken,
const handleMessage = ({ text, warning, error }: any) => { resolve: (text: string | null) => !aborted && resolve(text),
worker.terminate(); reject: (err: Error) => !aborted && reject(err)
if(aborted) return;
if(error) reject(new Error(error));
else {
if(warning) console.warn(warning);
resolve(text);
}
};
const handleError = (err: Error) => {
worker.terminate();
if(!aborted) reject(err);
};
worker.on('message', handleMessage);
worker.on('error', handleError);
worker.on('exit', (code) => {
if(code !== 0 && !aborted) reject(new Error(`Worker exited with code ${code}`));
}); });
worker.postMessage({file, model, speaker, modelDir: this.ai.options.path, token: this.ai.options.hfToken}); this.processQueue();
}); });
// Name speakers using AI
if(options.speaker == 'id') { if(options.speaker == 'id') {
if(!this.ai.language.defaultModel) throw new Error('Configure an LLM for advanced ASR speaker detection'); if(!this.ai.language.defaultModel) throw new Error('Configure an LLM for advanced ASR speaker detection');
p = p.then(async transcript => { p = p.then(async transcript => {

View File

@@ -2,8 +2,26 @@ import {createWorker} from 'tesseract.js';
import {AbortablePromise, Ai} from './ai.ts'; import {AbortablePromise, Ai} from './ai.ts';
export class Vision { export class Vision {
private worker: any = null;
private queue: Array<{ path: string, resolve: any, reject: any }> = [];
private busy = false;
constructor(private ai: Ai) { } constructor(private ai: Ai) {}
private async processQueue() {
if(this.busy || !this.queue.length) return;
this.busy = true;
const job = this.queue.shift()!;
if(!this.worker) this.worker = await createWorker(this.ai.options.ocr || 'eng', 2, {cachePath: this.ai.options.path});
try {
const {data} = await this.worker.recognize(job.path);
job.resolve(data.text.trim() || null);
} catch(err) {
job.reject(err);
}
this.busy = false;
this.processQueue();
}
/** /**
* Convert image to text using Optical Character Recognition * Convert image to text using Optical Character Recognition
@@ -11,13 +29,16 @@ export class Vision {
* @returns {AbortablePromise<string | null>} Promise of extracted text with abort method * @returns {AbortablePromise<string | null>} Promise of extracted text with abort method
*/ */
ocr(path: string): AbortablePromise<string | null> { ocr(path: string): AbortablePromise<string | null> {
let worker: any; let aborted = false;
const p = new Promise<string | null>(async res => { const abort = () => { aborted = true; };
worker = await createWorker(this.ai.options.ocr || 'eng', 2, {cachePath: this.ai.options.path}); const p = new Promise<string | null>((resolve, reject) => {
const {data} = await worker.recognize(path); this.queue.push({
await worker.terminate(); path,
res(data.text.trim() || null); resolve: (text: string | null) => !aborted && resolve(text),
reject: (err: Error) => !aborted && reject(err)
}); });
return Object.assign(p, {abort: () => worker?.terminate()}); this.processQueue();
});
return Object.assign(p, {abort});
} }
} }