Compare commits

...

7 Commits
0.5.1 ... 0.6.1

Author SHA1 Message Date
46ae0f7913 expose diarization support checking function
All checks were successful
Publish Library / Build NPM Project (push) Successful in 25s
Publish Library / Tag Version (push) Successful in 5s
2026-02-12 11:55:29 -05:00
54730a2b9a Speaker diarization
All checks were successful
Publish Library / Build NPM Project (push) Successful in 31s
Publish Library / Tag Version (push) Successful in 5s
2026-02-12 11:26:11 -05:00
27506d20af Fix anthropic message history
All checks were successful
Publish Library / Build NPM Project (push) Successful in 30s
Publish Library / Tag Version (push) Successful in 5s
2026-02-11 22:45:30 -05:00
8c64129200 Removed log statement
All checks were successful
Publish Library / Build NPM Project (push) Successful in 27s
Publish Library / Tag Version (push) Successful in 5s
2026-02-11 21:58:39 -05:00
013aa942c0 Added save directory for embedder
All checks were successful
Publish Library / Build NPM Project (push) Successful in 33s
Publish Library / Tag Version (push) Successful in 4s
2026-02-11 21:45:54 -05:00
c8d5660b1a Enable quantized embedder for speed boost
All checks were successful
Publish Library / Build NPM Project (push) Successful in 23s
Publish Library / Tag Version (push) Successful in 5s
2026-02-11 20:28:14 -05:00
f2c66b0cb8 Updated default embedder
All checks were successful
Publish Library / Build NPM Project (push) Successful in 39s
Publish Library / Tag Version (push) Successful in 8s
2026-02-11 20:23:50 -05:00
8 changed files with 148 additions and 83 deletions

View File

@@ -1,6 +1,6 @@
{ {
"name": "@ztimson/ai-utils", "name": "@ztimson/ai-utils",
"version": "0.5.1", "version": "0.6.1",
"description": "AI Utility library", "description": "AI Utility library",
"author": "Zak Timson", "author": "Zak Timson",
"license": "MIT", "license": "MIT",

View File

@@ -10,22 +10,16 @@ export type AbortablePromise<T> = Promise<T> & {
export type AiOptions = { export type AiOptions = {
/** Path to models */ /** Path to models */
path?: string; path?: string;
/** ASR model: whisper-tiny, whisper-base */
asr?: string;
/** Embedding model: all-MiniLM-L6-v2, bge-small-en-v1.5, bge-large-en-v1.5 */
embedder?: string;
/** Large language models, first is default */ /** Large language models, first is default */
llm?: Omit<LLMRequest, 'model'> & { llm?: Omit<LLMRequest, 'model'> & {
models: {[model: string]: AnthropicConfig | OllamaConfig | OpenAiConfig}; models: {[model: string]: AnthropicConfig | OllamaConfig | OpenAiConfig};
} }
/** Tesseract OCR configuration */ /** OCR model: eng, eng_best, eng_fast */
tesseract?: { ocr?: string;
/** Model: eng, eng_best, eng_fast */
model?: string;
}
/** Whisper ASR configuration */
whisper?: {
/** Whisper binary location */
binary: string;
/** Model: `ggml-base.en.bin` */
model: string;
}
} }
export class Ai { export class Ai {

View File

@@ -13,25 +13,25 @@ export class Anthropic extends LLMProvider {
} }
private toStandard(history: any[]): LLMMessage[] { private toStandard(history: any[]): LLMMessage[] {
for(let i = 0; i < history.length; i++) { const timestamp = Date.now();
const orgI = i; const messages: LLMMessage[] = [];
if(typeof history[orgI].content != 'string') { for(let h of history) {
if(history[orgI].role == 'assistant') { if(typeof h.content == 'string') {
history[orgI].content.filter((c: any) => c.type =='tool_use').forEach((c: any) => { messages.push(<any>{timestamp, ...h});
history.splice(i + 1, 0, {role: 'tool', id: c.id, name: c.name, args: c.input, timestamp: Date.now()}); } else {
}); const textContent = h.content?.filter((c: any) => c.type == 'text').map((c: any) => c.text).join('\n\n');
} else if(history[orgI].role == 'user') { if(textContent) messages.push({timestamp, role: h.role, content: textContent});
history[orgI].content.filter((c: any) => c.type =='tool_result').forEach((c: any) => { h.content.forEach((c: any) => {
const h = history.find((h: any) => h.id == c.tool_use_id); if(c.type == 'tool_use') {
h[c.is_error ? 'error' : 'content'] = c.content; messages.push({timestamp, role: 'tool', id: c.id, name: c.name, args: c.input, content: undefined});
} else if(c.type == 'tool_result') {
const m: any = messages.findLast(m => (<any>m).id == c.tool_use_id);
if(m) m[c.is_error ? 'error' : 'content'] = c.content;
}
}); });
} }
history[orgI].content = history[orgI].content.filter((c: any) => c.type == 'text').map((c: any) => c.text).join('\n\n');
if(!history[orgI].content) history.splice(orgI, 1);
} }
if(!history[orgI].timestamp) history[orgI].timestamp = Date.now(); return messages;
}
return history.filter(h => !!h.content);
} }
private fromStandard(history: LLMMessage[]): any[] { private fromStandard(history: LLMMessage[]): any[] {
@@ -50,8 +50,8 @@ export class Anthropic extends LLMProvider {
ask(message: string, options: LLMRequest = {}): AbortablePromise<string> { ask(message: string, options: LLMRequest = {}): AbortablePromise<string> {
const controller = new AbortController(); const controller = new AbortController();
return Object.assign(new Promise<any>(async (res, rej) => { return Object.assign(new Promise<any>(async (res) => {
const history = this.fromStandard([...options.history || [], {role: 'user', content: message, timestamp: Date.now()}]); let history = this.fromStandard([...options.history || [], {role: 'user', content: message, timestamp: Date.now()}]);
const tools = options.tools || this.ai.options.llm?.tools || []; const tools = options.tools || this.ai.options.llm?.tools || [];
const requestParams: any = { const requestParams: any = {
model: options.model || this.model, model: options.model || this.model,
@@ -73,7 +73,6 @@ export class Anthropic extends LLMProvider {
}; };
let resp: any, isFirstMessage = true; let resp: any, isFirstMessage = true;
const assistantMessages: string[] = [];
do { do {
resp = await this.client.messages.create(requestParams).catch(err => { resp = await this.client.messages.create(requestParams).catch(err => {
err.message += `\n\nMessages:\n${JSON.stringify(history, null, 2)}`; err.message += `\n\nMessages:\n${JSON.stringify(history, null, 2)}`;
@@ -119,7 +118,6 @@ export class Anthropic extends LLMProvider {
if(options.stream) options.stream({tool: toolCall.name}); if(options.stream) options.stream({tool: toolCall.name});
if(!tool) return {tool_use_id: toolCall.id, is_error: true, content: 'Tool not found'}; if(!tool) return {tool_use_id: toolCall.id, is_error: true, content: 'Tool not found'};
try { try {
console.log(typeof tool.fn);
const result = await tool.fn(toolCall.input, options?.stream, this.ai); const result = await tool.fn(toolCall.input, options?.stream, this.ai);
return {type: 'tool_result', tool_use_id: toolCall.id, content: JSONSanitize(result)}; return {type: 'tool_result', tool_use_id: toolCall.id, content: JSONSanitize(result)};
} catch (err: any) { } catch (err: any) {
@@ -131,7 +129,7 @@ export class Anthropic extends LLMProvider {
} }
} while (!controller.signal.aborted && resp.content.some((c: any) => c.type === 'tool_use')); } while (!controller.signal.aborted && resp.content.some((c: any) => c.type === 'tool_use'));
history.push({role: 'assistant', content: resp.content.filter((c: any) => c.type == 'text').map((c: any) => c.text).join('\n\n')}); history.push({role: 'assistant', content: resp.content.filter((c: any) => c.type == 'text').map((c: any) => c.text).join('\n\n')});
this.toStandard(history); history = this.toStandard(history);
if(options.stream) options.stream({done: true}); if(options.stream) options.stream({done: true});
if(options.history) options.history.splice(0, options.history.length, ...history); if(options.history) options.history.splice(0, options.history.length, ...history);

View File

@@ -1,50 +1,115 @@
import { spawn } from 'node:child_process'; import { spawn } from 'node:child_process';
import fs from 'node:fs/promises'; import { pipeline } from '@xenova/transformers';
import Path from 'node:path';
import { AbortablePromise, Ai } from './ai.ts'; import { AbortablePromise, Ai } from './ai.ts';
export class Audio { export class Audio {
private downloads: {[key: string]: Promise<string>} = {}; private whisperPipeline: any;
private whisperModel!: string;
constructor(private ai: Ai) { constructor(private ai: Ai) {}
if(ai.options.whisper?.binary) {
this.whisperModel = ai.options.whisper?.model.endsWith('.bin') ? ai.options.whisper?.model : ai.options.whisper?.model + '.bin'; private combineSpeakerTranscript(chunks: any[], speakers: any[]): string {
this.downloadAsrModel(); const speakerMap = new Map();
let speakerCount = 0;
speakers.forEach((seg: any) => {
if(!speakerMap.has(seg.speaker)) speakerMap.set(seg.speaker, ++speakerCount);
});
const lines: string[] = [];
let currentSpeaker = -1;
let currentText = '';
chunks.forEach((chunk: any) => {
const time = chunk.timestamp[0];
const speaker = speakers.find((s: any) => time >= s.start && time <= s.end);
const speakerNum = speaker ? speakerMap.get(speaker.speaker) : 1;
if (speakerNum !== currentSpeaker) {
if(currentText) lines.push(`[speaker ${currentSpeaker}]: ${currentText.trim()}`);
currentSpeaker = speakerNum;
currentText = chunk.text;
} else {
currentText += chunk.text;
} }
});
if(currentText) lines.push(`[speaker ${currentSpeaker}]: ${currentText.trim()}`);
return lines.join('\n');
} }
asr(path: string, model: string = this.whisperModel): AbortablePromise<string | null> { async canDiarization(): Promise<boolean> {
if(!this.ai.options.whisper?.binary) throw new Error('Whisper not configured'); return new Promise((resolve) => {
let abort: any = () => {}; const proc = spawn('python3', ['-c', 'import pyannote.audio']);
const p = new Promise<string | null>(async (resolve, reject) => { proc.on('close', (code: number) => resolve(code === 0));
const m = await this.downloadAsrModel(model); proc.on('error', () => resolve(false));
});
}
private async runDiarization(audioPath: string): Promise<any[]> {
if(!await this.canDiarization()) throw new Error('Pyannote is not installed: pip install pyannote.audio');
const script = `
import sys
import json
from pyannote.audio import Pipeline
os.environ['TORCH_HOME'] = "${this.ai.options.path}"
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
diarization = pipeline(sys.argv[1])
segments = []
for turn, _, speaker in diarization.itertracks(yield_label=True):
segments.append({
"start": turn.start,
"end": turn.end,
"speaker": speaker
})
print(json.dumps(segments))
`;
return new Promise((resolve, reject) => {
let output = ''; let output = '';
const proc = spawn(<string>this.ai.options.whisper?.binary, ['-nt', '-np', '-m', m, '-f', path], {stdio: ['ignore', 'pipe', 'ignore']}); const proc = spawn('python3', ['-c', script, audioPath]);
abort = () => proc.kill('SIGTERM');
proc.on('error', (err: Error) => reject(err));
proc.stdout.on('data', (data: Buffer) => output += data.toString()); proc.stdout.on('data', (data: Buffer) => output += data.toString());
proc.stderr.on('data', (data: Buffer) => console.error(data.toString()));
proc.on('close', (code: number) => { proc.on('close', (code: number) => {
if(code === 0) resolve(output.trim() || null); if(code === 0) {
else reject(new Error(`Exit code ${code}`)); try {
resolve(JSON.parse(output));
} catch (err) {
reject(new Error('Failed to parse diarization output'));
}
} else {
reject(new Error(`Python process exited with code ${code}`));
}
}); });
proc.on('error', reject);
}); });
}
asr(path: string, options: { model?: string; speaker?: boolean } = {}): AbortablePromise<string | null> {
const { model = this.ai.options.asr || 'whisper-base', speaker = false } = options;
let aborted = false;
const abort = () => { aborted = true; };
const p = new Promise<string | null>(async (resolve, reject) => {
try {
if(aborted) return resolve(null);
if(!this.whisperPipeline) this.whisperPipeline = await pipeline('automatic-speech-recognition', `Xenova/${model}`, { cache_dir: this.ai.options.path, quantized: true });
// Transcript
if(aborted) return resolve(null);
const transcriptResult = await this.whisperPipeline(path, {return_timestamps: speaker ? 'word' : false, chunk_length_s: 30,});
if(!speaker) return resolve(transcriptResult.text?.trim() || null);
// Speaker Diarization
if(aborted) return resolve(null);
const speakers = await this.runDiarization(path);
if(aborted) return resolve(null);
const combined = this.combineSpeakerTranscript(transcriptResult.chunks || [], speakers);
resolve(combined);
} catch (err) {
reject(err);
}
});
return Object.assign(p, { abort }); return Object.assign(p, { abort });
} }
async downloadAsrModel(model: string = this.whisperModel): Promise<string> {
if(!this.ai.options.whisper?.binary) throw new Error('Whisper not configured');
if(!model.endsWith('.bin')) model += '.bin';
const p = Path.join(<string>this.ai.options.path, model);
if(await fs.stat(p).then(() => true).catch(() => false)) return p;
if(!!this.downloads[model]) return this.downloads[model];
this.downloads[model] = fetch(`https://huggingface.co/ggerganov/whisper.cpp/resolve/main/${model}`)
.then(resp => resp.arrayBuffer())
.then(arr => Buffer.from(arr)).then(async buffer => {
await fs.writeFile(p, buffer);
delete this.downloads[model];
return p;
});
return this.downloads[model];
}
} }

View File

@@ -1,11 +1,14 @@
import { pipeline } from '@xenova/transformers'; import { pipeline } from '@xenova/transformers';
import { parentPort } from 'worker_threads'; import { parentPort } from 'worker_threads';
let model: any; let embedder: any;
parentPort?.on('message', async ({ id, text }) => { parentPort?.on('message', async ({ id, text, model, path }) => {
if(!model) model = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2'); if(!embedder) embedder = await pipeline('feature-extraction', 'Xenova/' + model, {
const output = await model(text, { pooling: 'mean', normalize: true }); quantized: true,
cache_dir: path,
});
const output = await embedder(text, { pooling: 'mean', normalize: true });
const embedding = Array.from(output.data); const embedding = Array.from(output.data);
parentPort?.postMessage({ id, embedding }); parentPort?.postMessage({ id, embedding });
}); });

View File

@@ -271,7 +271,12 @@ class LLM {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const id = this.embedId++; const id = this.embedId++;
this.embedQueue.set(id, { resolve, reject }); this.embedQueue.set(id, { resolve, reject });
this.embedWorker?.postMessage({ id, text }); this.embedWorker?.postMessage({
id,
text,
model: this.ai.options?.embedder || 'bge-small-en-v1.5',
path: this.ai.options.path
});
}); });
}; };
const chunks = this.chunk(target, maxTokens, overlapTokens); const chunks = this.chunk(target, maxTokens, overlapTokens);

View File

@@ -68,7 +68,7 @@ export class OpenAi extends LLMProvider {
const controller = new AbortController(); const controller = new AbortController();
return Object.assign(new Promise<any>(async (res, rej) => { return Object.assign(new Promise<any>(async (res, rej) => {
if(options.system && options.history?.[0]?.role != 'system') options.history?.splice(0, 0, {role: 'system', content: options.system, timestamp: Date.now()}); if(options.system && options.history?.[0]?.role != 'system') options.history?.splice(0, 0, {role: 'system', content: options.system, timestamp: Date.now()});
const history = this.fromStandard([...options.history || [], {role: 'user', content: message, timestamp: Date.now()}]); let history = this.fromStandard([...options.history || [], {role: 'user', content: message, timestamp: Date.now()}]);
const tools = options.tools || this.ai.options.llm?.tools || []; const tools = options.tools || this.ai.options.llm?.tools || [];
const requestParams: any = { const requestParams: any = {
model: options.model || this.model, model: options.model || this.model,
@@ -133,7 +133,7 @@ export class OpenAi extends LLMProvider {
} }
} while (!controller.signal.aborted && resp.choices?.[0]?.message?.tool_calls?.length); } while (!controller.signal.aborted && resp.choices?.[0]?.message?.tool_calls?.length);
history.push({role: 'assistant', content: resp.choices[0].message.content || ''}); history.push({role: 'assistant', content: resp.choices[0].message.content || ''});
this.toStandard(history); history = this.toStandard(history);
if(options.stream) options.stream({done: true}); if(options.stream) options.stream({done: true});
if(options.history) options.history.splice(0, options.history.length, ...history); if(options.history) options.history.splice(0, options.history.length, ...history);

View File

@@ -13,7 +13,7 @@ export class Vision {
ocr(path: string): AbortablePromise<string | null> { ocr(path: string): AbortablePromise<string | null> {
let worker: any; let worker: any;
const p = new Promise<string | null>(async res => { const p = new Promise<string | null>(async res => {
worker = await createWorker(this.ai.options.tesseract?.model || 'eng', 2, {cachePath: this.ai.options.path}); worker = await createWorker(this.ai.options.ocr || 'eng', 2, {cachePath: this.ai.options.path});
const {data} = await worker.recognize(path); const {data} = await worker.recognize(path);
await worker.terminate(); await worker.terminate();
res(data.text.trim() || null); res(data.text.trim() || null);