Switching to processes and whisper.cpp to avoid transformers.js memory leaks
This commit is contained in:
66
src/llm.ts
66
src/llm.ts
@@ -4,9 +4,9 @@ import {Anthropic} from './antrhopic.ts';
|
||||
import {OpenAi} from './open-ai.ts';
|
||||
import {LLMProvider} from './provider.ts';
|
||||
import {AiTool} from './tools.ts';
|
||||
import {Worker} from 'worker_threads';
|
||||
import {fileURLToPath} from 'url';
|
||||
import {dirname, join} from 'path';
|
||||
import { spawn } from 'node:child_process';
|
||||
|
||||
export type AnthropicConfig = {proto: 'anthropic', token: string};
|
||||
export type OllamaConfig = {proto: 'ollama', host: string};
|
||||
@@ -258,34 +258,54 @@ class LLM {
|
||||
* @param {maxTokens?: number, overlapTokens?: number} opts Options for embedding such as chunk sizes
|
||||
* @returns {Promise<Awaited<{index: number, embedding: number[], text: string, tokens: number}>[]>} Chunked embeddings
|
||||
*/
|
||||
async embedding(target: object | string, opts: {maxTokens?: number, overlapTokens?: number} = {}) {
|
||||
embedding(target: object | string, opts: {maxTokens?: number, overlapTokens?: number} = {}): AbortablePromise<any[]> {
|
||||
let {maxTokens = 500, overlapTokens = 50} = opts;
|
||||
let aborted = false;
|
||||
const abort = () => { aborted = true; };
|
||||
|
||||
const embed = (text: string): Promise<number[]> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
const worker = new Worker(join(dirname(fileURLToPath(import.meta.url)), 'embedder.js'));
|
||||
const handleMessage = ({ embedding }: any) => {
|
||||
worker.terminate();
|
||||
resolve(embedding);
|
||||
};
|
||||
const handleError = (err: Error) => {
|
||||
worker.terminate();
|
||||
reject(err);
|
||||
};
|
||||
worker.on('message', handleMessage);
|
||||
worker.on('error', handleError);
|
||||
worker.on('exit', (code) => {
|
||||
if(code !== 0) reject(new Error(`Worker exited with code ${code}`));
|
||||
if(aborted) return reject(new Error('Aborted'));
|
||||
|
||||
const args: string[] = [
|
||||
join(dirname(fileURLToPath(import.meta.url)), 'embedder.js'),
|
||||
<string>this.ai.options.path,
|
||||
this.ai.options?.embedder || 'bge-small-en-v1.5'
|
||||
];
|
||||
const proc = spawn('node', args, {stdio: ['pipe', 'pipe', 'ignore']});
|
||||
proc.stdin.write(text);
|
||||
proc.stdin.end();
|
||||
|
||||
let output = '';
|
||||
proc.stdout.on('data', (data: Buffer) => output += data.toString());
|
||||
proc.on('close', (code: number) => {
|
||||
if(aborted) return reject(new Error('Aborted'));
|
||||
if(code === 0) {
|
||||
try {
|
||||
const result = JSON.parse(output);
|
||||
resolve(result.embedding);
|
||||
} catch(err) {
|
||||
reject(new Error('Failed to parse embedding output'));
|
||||
}
|
||||
} else {
|
||||
reject(new Error(`Embedder process exited with code ${code}`));
|
||||
}
|
||||
});
|
||||
worker.postMessage({text, model: this.ai.options?.embedder || 'bge-small-en-v1.5', modelDir: this.ai.options.path});
|
||||
proc.on('error', reject);
|
||||
});
|
||||
};
|
||||
const chunks = this.chunk(target, maxTokens, overlapTokens), results: any[] = [];
|
||||
for(let i = 0; i < chunks.length; i++) {
|
||||
const text= chunks[i];
|
||||
const embedding = await embed(text);
|
||||
results.push({index: i, embedding, text, tokens: this.estimateTokens(text)});
|
||||
}
|
||||
return results;
|
||||
|
||||
const p = (async () => {
|
||||
const chunks = this.chunk(target, maxTokens, overlapTokens), results: any[] = [];
|
||||
for(let i = 0; i < chunks.length; i++) {
|
||||
if(aborted) break;
|
||||
const text = chunks[i];
|
||||
const embedding = await embed(text);
|
||||
results.push({index: i, embedding, text, tokens: this.estimateTokens(text)});
|
||||
}
|
||||
return results;
|
||||
})();
|
||||
return Object.assign(p, { abort });
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user