Switching to processes and whisper.cpp to avoid transformers.js memory leaks

2026-02-20 21:50:01 -05:00
parent 790608f020
commit 39537a4a8f
9 changed files with 215 additions and 262 deletions
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -4,9 +4,9 @@ import {Anthropic} from './antrhopic.ts';
 import {OpenAi} from './open-ai.ts';
 import {LLMProvider} from './provider.ts';
 import {AiTool} from './tools.ts';
-import {Worker} from 'worker_threads';
 import {fileURLToPath} from 'url';
 import {dirname, join} from 'path';
+import { spawn } from 'node:child_process';

 export type AnthropicConfig = {proto: 'anthropic', token: string};
 export type OllamaConfig = {proto: 'ollama', host: string};
@@ -258,34 +258,54 @@ class LLM {
 	 * @param {maxTokens?: number, overlapTokens?: number} opts Options for embedding such as chunk sizes
 	 * @returns {Promise<Awaited<{index: number, embedding: number[], text: string, tokens: number}>[]>} Chunked embeddings
 	 */
-	async embedding(target: object | string, opts: {maxTokens?: number, overlapTokens?: number} = {}) {
+	embedding(target: object | string, opts: {maxTokens?: number, overlapTokens?: number} = {}): AbortablePromise<any[]> {
 		let {maxTokens = 500, overlapTokens = 50} = opts;
+		let aborted = false;
+		const abort = () => { aborted = true; };
+
 		const embed = (text: string): Promise<number[]> => {
 			return new Promise((resolve, reject) => {
-				const worker = new Worker(join(dirname(fileURLToPath(import.meta.url)), 'embedder.js'));
-				const handleMessage = ({ embedding }: any) => {
-					worker.terminate();
-					resolve(embedding);
-				};
-				const handleError = (err: Error) => {
-					worker.terminate();
-					reject(err);
-				};
-				worker.on('message', handleMessage);
-				worker.on('error', handleError);
-				worker.on('exit', (code) => {
-					if(code !== 0) reject(new Error(`Worker exited with code ${code}`));
+				if(aborted) return reject(new Error('Aborted'));
+
+				const args: string[] = [
+					join(dirname(fileURLToPath(import.meta.url)), 'embedder.js'),
+					<string>this.ai.options.path,
+					this.ai.options?.embedder || 'bge-small-en-v1.5'
+				];
+				const proc = spawn('node', args, {stdio: ['pipe', 'pipe', 'ignore']});
+				proc.stdin.write(text);
+				proc.stdin.end();
+
+				let output = '';
+				proc.stdout.on('data', (data: Buffer) => output += data.toString());
+				proc.on('close', (code: number) => {
+					if(aborted) return reject(new Error('Aborted'));
+					if(code === 0) {
+						try {
+							const result = JSON.parse(output);
+							resolve(result.embedding);
+						} catch(err) {
+							reject(new Error('Failed to parse embedding output'));
+						}
+					} else {
+						reject(new Error(`Embedder process exited with code ${code}`));
+					}
 				});
-				worker.postMessage({text, model: this.ai.options?.embedder || 'bge-small-en-v1.5', modelDir: this.ai.options.path});
+				proc.on('error', reject);
 			});
 		};
-		const chunks = this.chunk(target, maxTokens, overlapTokens), results: any[] = [];
-		for(let i = 0; i < chunks.length; i++) {
-			const text= chunks[i];
-			const embedding = await embed(text);
-			results.push({index: i, embedding, text, tokens: this.estimateTokens(text)});
-		}
-		return results;
+
+		const p = (async () => {
+			const chunks = this.chunk(target, maxTokens, overlapTokens), results: any[] = [];
+			for(let i = 0; i < chunks.length; i++) {
+				if(aborted) break;
+				const text = chunks[i];
+				const embedding = await embed(text);
+				results.push({index: i, embedding, text, tokens: this.estimateTokens(text)});
+			}
+			return results;
+		})();
+		return Object.assign(p, { abort });
 	}

 	/**