diff --git a/package.json b/package.json index bab4656..b607ab2 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@ztimson/ai-utils", - "version": "0.8.1", + "version": "0.8.2", "description": "AI Utility library", "author": "Zak Timson", "license": "MIT", diff --git a/src/llm.ts b/src/llm.ts index 296f714..6a0b591 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -44,8 +44,6 @@ export type LLMMemory = { fact: string; /** Owner and fact embedding vector */ embeddings: [number[], number[]]; - /** Creation time */ - timestamp: Date; } export type LLMRequest = { @@ -95,71 +93,85 @@ class LLM { * @returns {{abort: () => void, response: Promise}} Function to abort response and chat history */ ask(message: string, options: LLMRequest = {}): AbortablePromise { + options = { + system: '', + temperature: 0.8, + ...this.ai.options.llm, + models: undefined, + history: [], + ...options, + } const m = options.model || this.defaultModel; if(!this.models[m]) throw new Error(`Model does not exist: ${m}`); let abort = () => {}; return Object.assign(new Promise(async res => { if(!options.history) options.history = []; - // If memories were passed, find any relivant ones and add a tool for ADHOC lookups + // If memories were passed, find any relevant ones and add a tool for ADHOC lookups if(options.memory) { - options.system = (options.system || '') + '\nYou have passive persistent memory never make any mention of your memory capabilities and what you can/cannot remember\n'; - const search = async (query?: string | null, subject?: string | null, limit = 50) => { + const search = async (query?: string | null, subject?: string | null, limit = 10) => { const [o, q] = await Promise.all([ subject ? this.embedding(subject) : Promise.resolve(null), query ? this.embedding(query) : Promise.resolve(null), ]); - return (options.memory || []) - .map(m => ({...m, score: o ? this.cosineSimilarity(m.embeddings[0], o[0].embedding) : 1})) - .filter((m: any) => m.score >= 0.8) - .map((m: any) => ({...m, score: q ? this.cosineSimilarity(m.embeddings[1], q[0].embedding) : m.score})) - .filter((m: any) => m.score >= 0.2) - .toSorted((a: any, b: any) => a.score - b.score) - .slice(0, limit); + return (options.memory || []).map(m => { + const score = (o ? this.cosineSimilarity(m.embeddings[0], o[0].embedding) : 0) + + (q ? this.cosineSimilarity(m.embeddings[1], q[0].embedding) : 0); + return {...m, score}; + }).toSorted((a: any, b: any) => a.score - b.score).slice(0, limit); } + options.system += '\nYou have RAG memory and will be given the top_k closest memories regarding the users query. Save anything new you have learned worth remembering from the user message using the remember tool and feel free to recall memories manually.\n'; const relevant = await search(message); - if(relevant.length) options.history.push({role: 'assistant', content: 'Things I remembered:\n' + relevant.map(m => `${m.owner}: ${m.fact}`).join('\n')}); - options.tools = [...options.tools || [], { - name: 'read_memory', - description: 'Check your long-term memory for more information', + if(relevant.length) options.history.push({role: 'tool', name: 'recall', id: 'auto_recall_' + Math.random().toString(), args: {}, content: 'Things I remembered:\n' + relevant.map(m => `${m.owner}: ${m.fact}`).join('\n')}); + options.tools = [{ + name: 'recall', + description: 'Recall the closest memories you have regarding a query using RAG', args: { subject: {type: 'string', description: 'Find information by a subject topic, can be used with or without query argument'}, query: {type: 'string', description: 'Search memory based on a query, can be used with or without subject argument'}, - limit: {type: 'number', description: 'Result limit, default 5'}, + topK: {type: 'number', description: 'Result limit, default 5'}, }, fn: (args) => { if(!args.subject && !args.query) throw new Error('Either a subject or query argument is required'); - return search(args.query, args.subject, args.limit || 5); + return search(args.query, args.subject, args.topK); } - }]; + }, { + name: 'remember', + description: 'Store important facts user shares for future recall', + args: { + owner: {type: 'string', description: 'Subject/person this fact is about'}, + fact: {type: 'string', description: 'The information to remember'} + }, + fn: async (args) => { + if(!options.memory) return; + const e = await Promise.all([ + this.embedding(args.owner), + this.embedding(`${args.owner}: ${args.fact}`) + ]); + const newMem = {owner: args.owner, fact: args.fact, embeddings: [e[0][0].embedding, e[1][0].embedding]}; + options.memory.splice(0, options.memory.length, ...[ + ...options.memory.filter(m => { + return this.cosineSimilarity(newMem.embeddings[0], m.embeddings[0]) < 0.9 && this.cosineSimilarity(newMem.embeddings[1], m.embeddings[1]) < 0.8; + }), + newMem + ]); + return 'Remembered!'; + } + }, ...options.tools || []]; } // Ask const resp = await this.models[m].ask(message, options); - // Remove any memory calls - if(options.memory) { - const i = options.history?.findIndex((h: any) => h.role == 'assistant' && h.content.startsWith('Things I remembered:')); - if(i != null && i >= 0) options.history?.splice(i, 1); + // Remove any memory calls from history + if(options.memory) options.history.splice(0, options.history.length, ...options.history.filter(h => h.role != 'tool' || (h.name != 'recall' && h.name != 'remember'))); + + // Compress message history + if(options.compress) { + const compressed = await this.ai.language.compressHistory(options.history, options.compress.max, options.compress.min, options); + options.history.splice(0, options.history.length, ...compressed); } - // Handle compression and memory extraction - if(options.compress || options.memory) { - let compressed: any = null; - if(options.compress) { - compressed = await this.ai.language.compressHistory(options.history, options.compress.max, options.compress.min, options); - options.history.splice(0, options.history.length, ...compressed.history); - } else { - const i = options.history?.findLastIndex(m => m.role == 'user') ?? -1; - compressed = await this.ai.language.compressHistory(i != -1 ? options.history.slice(i) : options.history, 0, 0, options); - } - if(options.memory) { - const updated = options.memory - .filter(m => !compressed.memory.some(m2 => this.cosineSimilarity(m.embeddings[1], m2.embeddings[1]) > 0.8)) - .concat(compressed.memory); - options.memory.splice(0, options.memory.length, ...updated); - } - } return res(resp); }), {abort}); } @@ -181,32 +193,24 @@ class LLM { * @param {LLMRequest} options LLM options * @returns {Promise} New chat history will summary at index 0 */ - async compressHistory(history: LLMMessage[], max: number, min: number, options?: LLMRequest): Promise<{history: LLMMessage[], memory: LLMMemory[]}> { - if(this.estimateTokens(history) < max) return {history, memory: []}; + async compressHistory(history: LLMMessage[], max: number, min: number, options?: LLMRequest): Promise { + if(this.estimateTokens(history) < max) return history; let keep = 0, tokens = 0; for(let m of history.toReversed()) { tokens += this.estimateTokens(m.content); if(tokens < min) keep++; else break; } - if(history.length <= keep) return {history, memory: []}; + if(history.length <= keep) return history; const system = history[0].role == 'system' ? history[0] : null, recent = keep == 0 ? [] : history.slice(-keep), process = (keep == 0 ? history : history.slice(0, -keep)).filter(h => h.role === 'assistant' || h.role === 'user'); - const summary: any = await this.json(process.map(m => `${m.role}: ${m.content}`).join('\n\n'), '{summary: string, facts: [[subject, fact]]}', { - system: 'Create the smallest summary possible, no more than 500 tokens. Create a list of NEW facts (split by subject [pro]noun and fact) about what you learned from this conversation that you didn\'t already know or get from a tool call or system prompt. Focus only on new information about people, topics, or facts. Avoid generating facts about the AI.', - model: options?.model, - temperature: options?.temperature || 0.3 - }); - const timestamp = new Date(); - const memory = await Promise.all((summary?.facts || [])?.map(async ([owner, fact]: [string, string]) => { - const e = await Promise.all([this.embedding(owner), this.embedding(`${owner}: ${fact}`)]); - return {owner, fact, embeddings: [e[0][0].embedding, e[1][0].embedding], timestamp}; - })); - const h = [{role: 'assistant', content: `Conversation Summary: ${summary?.summary}`, timestamp: Date.now()}, ...recent]; + const summary: any = await this.summarize(process.map(m => `[${m.role}]: ${m.content}`).join('\n\n'), 500, options); + const d = Date.now(); + const h = [{role: 'tool', name: 'summary', id: `summary_` + d, args: {}, content: `Conversation Summary: ${summary?.summary}`, timestamp: d}, ...recent]; if(system) h.splice(0, 0, system); - return {history: h, memory}; + return h; } /** @@ -243,7 +247,7 @@ class LLM { return `${p}: ${Array.isArray(value) ? value.join(', ') : value}`; }); }; - const lines = typeof target === 'object' ? objString(target) : target.split('\n'); + const lines = typeof target === 'object' ? objString(target) : target.toString().split('\n'); const tokens = lines.flatMap(l => [...l.split(/\s+/).filter(Boolean), '\n']); const chunks: string[] = []; for(let i = 0; i < tokens.length;) { @@ -366,8 +370,8 @@ class LLM { * @param options LLM request options * @returns {Promise} Summary */ - summarize(text: string, tokens: number, options?: LLMRequest): Promise { - return this.ask(text, {system: `Generate a brief summary <= ${tokens} tokens. Output nothing else`, temperature: 0.3, ...options}); + summarize(text: string, tokens: number = 500, options?: LLMRequest): Promise { + return this.ask(text, {system: `Generate the shortest summary possible <= ${tokens} tokens. Output nothing else`, temperature: 0.3, ...options}); } } diff --git a/src/open-ai.ts b/src/open-ai.ts index 2271408..e83fe53 100644 --- a/src/open-ai.ts +++ b/src/open-ai.ts @@ -11,7 +11,7 @@ export class OpenAi extends LLMProvider { super(); this.client = new openAI(clean({ baseURL: host, - apiKey: token + apiKey: token || host ? 'ignored' : undefined })); } @@ -67,7 +67,10 @@ export class OpenAi extends LLMProvider { ask(message: string, options: LLMRequest = {}): AbortablePromise { const controller = new AbortController(); return Object.assign(new Promise(async (res, rej) => { - if(options.system && options.history?.[0]?.role != 'system') options.history?.splice(0, 0, {role: 'system', content: options.system, timestamp: Date.now()}); + if(options.system) { + if(options.history?.[0]?.role != 'system') options.history?.splice(0, 0, {role: 'system', content: options.system, timestamp: Date.now()}); + else options.history[0].content = options.system; + } let history = this.fromStandard([...options.history || [], {role: 'user', content: message, timestamp: Date.now()}]); const tools = options.tools || this.ai.options.llm?.tools || []; const requestParams: any = {