379 lines
15 KiB
TypeScript
379 lines
15 KiB
TypeScript
import {JSONAttemptParse} from '@ztimson/utils';
|
|
import {AbortablePromise, Ai} from './ai.ts';
|
|
import {Anthropic} from './antrhopic.ts';
|
|
import {OpenAi} from './open-ai.ts';
|
|
import {LLMProvider} from './provider.ts';
|
|
import {AiTool} from './tools.ts';
|
|
import {fileURLToPath} from 'url';
|
|
import {dirname, join} from 'path';
|
|
import { spawn } from 'node:child_process';
|
|
|
|
export type AnthropicConfig = {proto: 'anthropic', token: string};
|
|
export type OllamaConfig = {proto: 'ollama', host: string};
|
|
export type OpenAiConfig = {proto: 'openai', host?: string, token: string};
|
|
|
|
export type LLMMessage = {
|
|
/** Message originator */
|
|
role: 'assistant' | 'system' | 'user';
|
|
/** Message content */
|
|
content: string | any;
|
|
/** Timestamp */
|
|
timestamp?: number;
|
|
} | {
|
|
/** Tool call */
|
|
role: 'tool';
|
|
/** Unique ID for call */
|
|
id: string;
|
|
/** Tool that was run */
|
|
name: string;
|
|
/** Tool arguments */
|
|
args: any;
|
|
/** Tool result */
|
|
content: undefined | string;
|
|
/** Tool error */
|
|
error?: undefined | string;
|
|
/** Timestamp */
|
|
timestamp?: number;
|
|
}
|
|
|
|
/** Background information the AI will be fed */
|
|
export type LLMMemory = {
|
|
/** What entity is this fact about */
|
|
owner: string;
|
|
/** The information that will be remembered */
|
|
fact: string;
|
|
/** Owner and fact embedding vector */
|
|
embeddings: [number[], number[]];
|
|
}
|
|
|
|
export type LLMRequest = {
|
|
/** System prompt */
|
|
system?: string;
|
|
/** Message history */
|
|
history?: LLMMessage[];
|
|
/** Max tokens for request */
|
|
max_tokens?: number;
|
|
/** 0 = Rigid Logic, 1 = Balanced, 2 = Hyper Creative **/
|
|
temperature?: number;
|
|
/** Available tools */
|
|
tools?: AiTool[];
|
|
/** LLM model */
|
|
model?: string;
|
|
/** Stream response */
|
|
stream?: (chunk: {text?: string, tool?: string, done?: true}) => any;
|
|
/** Compress old messages in the chat to free up context */
|
|
compress?: {
|
|
/** Trigger chat compression once context exceeds the token count */
|
|
max: number;
|
|
/** Compress chat until context size smaller than */
|
|
min: number
|
|
},
|
|
/** Background information the AI will be fed */
|
|
memory?: LLMMemory[],
|
|
}
|
|
|
|
class LLM {
|
|
defaultModel!: string;
|
|
models: {[model: string]: LLMProvider} = {};
|
|
|
|
constructor(public readonly ai: Ai) {
|
|
if(!ai.options.llm?.models) return;
|
|
Object.entries(ai.options.llm.models).forEach(([model, config]) => {
|
|
if(!this.defaultModel) this.defaultModel = model;
|
|
if(config.proto == 'anthropic') this.models[model] = new Anthropic(this.ai, config.token, model);
|
|
else if(config.proto == 'ollama') this.models[model] = new OpenAi(this.ai, config.host, 'not-needed', model);
|
|
else if(config.proto == 'openai') this.models[model] = new OpenAi(this.ai, config.host || null, config.token, model);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Chat with LLM
|
|
* @param {string} message Question
|
|
* @param {LLMRequest} options Configuration options and chat history
|
|
* @returns {{abort: () => void, response: Promise<string>}} Function to abort response and chat history
|
|
*/
|
|
ask(message: string, options: LLMRequest = {}): AbortablePromise<string> {
|
|
options = <any>{
|
|
system: '',
|
|
temperature: 0.8,
|
|
...this.ai.options.llm,
|
|
models: undefined,
|
|
history: [],
|
|
...options,
|
|
}
|
|
const m = options.model || this.defaultModel;
|
|
if(!this.models[m]) throw new Error(`Model does not exist: ${m}`);
|
|
let abort = () => {};
|
|
return Object.assign(new Promise<string>(async res => {
|
|
if(!options.history) options.history = [];
|
|
// If memories were passed, find any relevant ones and add a tool for ADHOC lookups
|
|
if(options.memory) {
|
|
const search = async (query?: string | null, subject?: string | null, limit = 10) => {
|
|
const [o, q] = await Promise.all([
|
|
subject ? this.embedding(subject) : Promise.resolve(null),
|
|
query ? this.embedding(query) : Promise.resolve(null),
|
|
]);
|
|
return (options.memory || []).map(m => {
|
|
const score = (o ? this.cosineSimilarity(m.embeddings[0], o[0].embedding) : 0)
|
|
+ (q ? this.cosineSimilarity(m.embeddings[1], q[0].embedding) : 0);
|
|
return {...m, score};
|
|
}).toSorted((a: any, b: any) => a.score - b.score).slice(0, limit);
|
|
}
|
|
|
|
options.system += '\nYou have RAG memory and will be given the top_k closest memories regarding the users query. Save anything new you have learned worth remembering from the user message using the remember tool and feel free to recall memories manually.\n';
|
|
const relevant = await search(message);
|
|
if(relevant.length) options.history.push({role: 'tool', name: 'recall', id: 'auto_recall_' + Math.random().toString(), args: {}, content: 'Things I remembered:\n' + relevant.map(m => `${m.owner}: ${m.fact}`).join('\n')});
|
|
options.tools = [{
|
|
name: 'recall',
|
|
description: 'Recall the closest memories you have regarding a query using RAG',
|
|
args: {
|
|
subject: {type: 'string', description: 'Find information by a subject topic, can be used with or without query argument'},
|
|
query: {type: 'string', description: 'Search memory based on a query, can be used with or without subject argument'},
|
|
topK: {type: 'number', description: 'Result limit, default 5'},
|
|
},
|
|
fn: (args) => {
|
|
if(!args.subject && !args.query) throw new Error('Either a subject or query argument is required');
|
|
return search(args.query, args.subject, args.topK);
|
|
}
|
|
}, {
|
|
name: 'remember',
|
|
description: 'Store important facts user shares for future recall',
|
|
args: {
|
|
owner: {type: 'string', description: 'Subject/person this fact is about'},
|
|
fact: {type: 'string', description: 'The information to remember'}
|
|
},
|
|
fn: async (args) => {
|
|
if(!options.memory) return;
|
|
const e = await Promise.all([
|
|
this.embedding(args.owner),
|
|
this.embedding(`${args.owner}: ${args.fact}`)
|
|
]);
|
|
const newMem = {owner: args.owner, fact: args.fact, embeddings: <any>[e[0][0].embedding, e[1][0].embedding]};
|
|
options.memory.splice(0, options.memory.length, ...[
|
|
...options.memory.filter(m => {
|
|
return this.cosineSimilarity(newMem.embeddings[0], m.embeddings[0]) < 0.9 && this.cosineSimilarity(newMem.embeddings[1], m.embeddings[1]) < 0.8;
|
|
}),
|
|
newMem
|
|
]);
|
|
return 'Remembered!';
|
|
}
|
|
}, ...options.tools || []];
|
|
}
|
|
|
|
// Ask
|
|
const resp = await this.models[m].ask(message, options);
|
|
|
|
// Remove any memory calls from history
|
|
if(options.memory) options.history.splice(0, options.history.length, ...options.history.filter(h => h.role != 'tool' || (h.name != 'recall' && h.name != 'remember')));
|
|
|
|
// Compress message history
|
|
if(options.compress) {
|
|
const compressed = await this.ai.language.compressHistory(options.history, options.compress.max, options.compress.min, options);
|
|
options.history.splice(0, options.history.length, ...compressed);
|
|
}
|
|
|
|
return res(resp);
|
|
}), {abort});
|
|
}
|
|
|
|
async code(message: string, options?: LLMRequest): Promise<any> {
|
|
const resp = await this.ask(message, {...options, system: [
|
|
options?.system,
|
|
'Return your response in a code block'
|
|
].filter(t => !!t).join(('\n'))});
|
|
const codeBlock = /```(?:.+)?\s*([\s\S]*?)```/.exec(resp);
|
|
return codeBlock ? codeBlock[1].trim() : null;
|
|
}
|
|
|
|
/**
|
|
* Compress chat history to reduce context size
|
|
* @param {LLMMessage[]} history Chatlog that will be compressed
|
|
* @param max Trigger compression once context is larger than max
|
|
* @param min Leave messages less than the token minimum, summarize the rest
|
|
* @param {LLMRequest} options LLM options
|
|
* @returns {Promise<LLMMessage[]>} New chat history will summary at index 0
|
|
*/
|
|
async compressHistory(history: LLMMessage[], max: number, min: number, options?: LLMRequest): Promise<LLMMessage[]> {
|
|
if(this.estimateTokens(history) < max) return history;
|
|
let keep = 0, tokens = 0;
|
|
for(let m of history.toReversed()) {
|
|
tokens += this.estimateTokens(m.content);
|
|
if(tokens < min) keep++;
|
|
else break;
|
|
}
|
|
if(history.length <= keep) return history;
|
|
const system = history[0].role == 'system' ? history[0] : null,
|
|
recent = keep == 0 ? [] : history.slice(-keep),
|
|
process = (keep == 0 ? history : history.slice(0, -keep)).filter(h => h.role === 'assistant' || h.role === 'user');
|
|
|
|
const summary: any = await this.summarize(process.map(m => `[${m.role}]: ${m.content}`).join('\n\n'), 500, options);
|
|
const d = Date.now();
|
|
const h = [{role: <any>'tool', name: 'summary', id: `summary_` + d, args: {}, content: `Conversation Summary: ${summary?.summary}`, timestamp: d}, ...recent];
|
|
if(system) h.splice(0, 0, system);
|
|
return h;
|
|
}
|
|
|
|
/**
|
|
* Compare the difference between embeddings (calculates the angle between two vectors)
|
|
* @param {number[]} v1 First embedding / vector comparison
|
|
* @param {number[]} v2 Second embedding / vector for comparison
|
|
* @returns {number} Similarity values 0-1: 0 = unique, 1 = identical
|
|
*/
|
|
cosineSimilarity(v1: number[], v2: number[]): number {
|
|
if (v1.length !== v2.length) throw new Error('Vectors must be same length');
|
|
let dotProduct = 0, normA = 0, normB = 0;
|
|
for (let i = 0; i < v1.length; i++) {
|
|
dotProduct += v1[i] * v2[i];
|
|
normA += v1[i] * v1[i];
|
|
normB += v2[i] * v2[i];
|
|
}
|
|
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
return denominator === 0 ? 0 : dotProduct / denominator;
|
|
}
|
|
|
|
/**
|
|
* Chunk text into parts for AI digestion
|
|
* @param {object | string} target Item that will be chunked (objects get converted)
|
|
* @param {number} maxTokens Chunking size. More = better context, less = more specific (Search by paragraphs or lines)
|
|
* @param {number} overlapTokens Includes previous X tokens to provide continuity to AI (In addition to max tokens)
|
|
* @returns {string[]} Chunked strings
|
|
*/
|
|
chunk(target: object | string, maxTokens = 500, overlapTokens = 50): string[] {
|
|
const objString = (obj: any, path = ''): string[] => {
|
|
if(!obj) return [];
|
|
return Object.entries(obj).flatMap(([key, value]) => {
|
|
const p = path ? `${path}${isNaN(+key) ? `.${key}` : `[${key}]`}` : key;
|
|
if(typeof value === 'object' && !Array.isArray(value)) return objString(value, p);
|
|
return `${p}: ${Array.isArray(value) ? value.join(', ') : value}`;
|
|
});
|
|
};
|
|
const lines = typeof target === 'object' ? objString(target) : target.toString().split('\n');
|
|
const tokens = lines.flatMap(l => [...l.split(/\s+/).filter(Boolean), '\n']);
|
|
const chunks: string[] = [];
|
|
for(let i = 0; i < tokens.length;) {
|
|
let text = '', j = i;
|
|
while(j < tokens.length) {
|
|
const next = text + (text ? ' ' : '') + tokens[j];
|
|
if(this.estimateTokens(next.replace(/\s*\n\s*/g, '\n')) > maxTokens && text) break;
|
|
text = next;
|
|
j++;
|
|
}
|
|
const clean = text.replace(/\s*\n\s*/g, '\n').trim();
|
|
if(clean) chunks.push(clean);
|
|
i = Math.max(j - overlapTokens, j === i ? i + 1 : j);
|
|
}
|
|
return chunks;
|
|
}
|
|
|
|
/**
|
|
* Create a vector representation of a string
|
|
* @param {object | string} target Item that will be embedded (objects get converted)
|
|
* @param {maxTokens?: number, overlapTokens?: number} opts Options for embedding such as chunk sizes
|
|
* @returns {Promise<Awaited<{index: number, embedding: number[], text: string, tokens: number}>[]>} Chunked embeddings
|
|
*/
|
|
embedding(target: object | string, opts: {maxTokens?: number, overlapTokens?: number} = {}): AbortablePromise<any[]> {
|
|
let {maxTokens = 500, overlapTokens = 50} = opts;
|
|
let aborted = false;
|
|
const abort = () => { aborted = true; };
|
|
|
|
const embed = (text: string): Promise<number[]> => {
|
|
return new Promise((resolve, reject) => {
|
|
if(aborted) return reject(new Error('Aborted'));
|
|
|
|
const args: string[] = [
|
|
join(dirname(fileURLToPath(import.meta.url)), 'embedder.js'),
|
|
<string>this.ai.options.path,
|
|
this.ai.options?.embedder || 'bge-small-en-v1.5'
|
|
];
|
|
const proc = spawn('node', args, {stdio: ['pipe', 'pipe', 'ignore']});
|
|
proc.stdin.write(text);
|
|
proc.stdin.end();
|
|
|
|
let output = '';
|
|
proc.stdout.on('data', (data: Buffer) => output += data.toString());
|
|
proc.on('close', (code: number) => {
|
|
if(aborted) return reject(new Error('Aborted'));
|
|
if(code === 0) {
|
|
try {
|
|
const result = JSON.parse(output);
|
|
resolve(result.embedding);
|
|
} catch(err) {
|
|
reject(new Error('Failed to parse embedding output'));
|
|
}
|
|
} else {
|
|
reject(new Error(`Embedder process exited with code ${code}`));
|
|
}
|
|
});
|
|
proc.on('error', reject);
|
|
});
|
|
};
|
|
|
|
const p = (async () => {
|
|
const chunks = this.chunk(target, maxTokens, overlapTokens), results: any[] = [];
|
|
for(let i = 0; i < chunks.length; i++) {
|
|
if(aborted) break;
|
|
const text = chunks[i];
|
|
const embedding = await embed(text);
|
|
results.push({index: i, embedding, text, tokens: this.estimateTokens(text)});
|
|
}
|
|
return results;
|
|
})();
|
|
return Object.assign(p, { abort });
|
|
}
|
|
|
|
/**
|
|
* Estimate variable as tokens
|
|
* @param history Object to size
|
|
* @returns {number} Rough token count
|
|
*/
|
|
estimateTokens(history: any): number {
|
|
const text = JSON.stringify(history);
|
|
return Math.ceil((text.length / 4) * 1.2);
|
|
}
|
|
|
|
/**
|
|
* Compare the difference between two strings using tensor math
|
|
* @param target Text that will be checked
|
|
* @param {string} searchTerms Multiple search terms to check against target
|
|
* @returns {{avg: number, max: number, similarities: number[]}} Similarity values 0-1: 0 = unique, 1 = identical
|
|
*/
|
|
fuzzyMatch(target: string, ...searchTerms: string[]) {
|
|
if(searchTerms.length < 2) throw new Error('Requires at least 2 strings to compare');
|
|
const vector = (text: string, dimensions: number = 10): number[] => {
|
|
return text.toLowerCase().split('').map((char, index) =>
|
|
(char.charCodeAt(0) * (index + 1)) % dimensions / dimensions).slice(0, dimensions);
|
|
}
|
|
const v = vector(target);
|
|
const similarities = searchTerms.map(t => vector(t)).map(refVector => this.cosineSimilarity(v, refVector))
|
|
return {avg: similarities.reduce((acc, s) => acc + s, 0) / similarities.length, max: Math.max(...similarities), similarities}
|
|
}
|
|
|
|
/**
|
|
* Ask a question with JSON response
|
|
* @param {string} text Text to process
|
|
* @param {string} schema JSON schema the AI should match
|
|
* @param {LLMRequest} options Configuration options and chat history
|
|
* @returns {Promise<{} | {} | RegExpExecArray | null>}
|
|
*/
|
|
async json(text: string, schema: string, options?: LLMRequest): Promise<any> {
|
|
const code = await this.code(text, {...options, system: [
|
|
options?.system,
|
|
`Only respond using JSON matching this schema:\n\`\`\`json\n${schema}\n\`\`\``
|
|
].filter(t => !!t).join('\n')});
|
|
return code ? JSONAttemptParse(code, {}) : null;
|
|
}
|
|
|
|
/**
|
|
* Create a summary of some text
|
|
* @param {string} text Text to summarize
|
|
* @param {number} tokens Max number of tokens
|
|
* @param options LLM request options
|
|
* @returns {Promise<string>} Summary
|
|
*/
|
|
summarize(text: string, tokens: number = 500, options?: LLMRequest): Promise<string | null> {
|
|
return this.ask(text, {system: `Generate the shortest summary possible <= ${tokens} tokens. Output nothing else`, temperature: 0.3, ...options});
|
|
}
|
|
}
|
|
|
|
export default LLM;
|