Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 596e99daa7 | |||
| eda4eed87d | |||
| 7f88c2d1d0 | |||
| 5eae84f6cf | |||
| 52a3e73484 | |||
| ccb1bdf043 | |||
| b814ea8b28 | |||
| 06dda88dbc | |||
| 5d34652d46 |
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@ztimson/ai-utils",
|
"name": "@ztimson/ai-utils",
|
||||||
"version": "0.8.6",
|
"version": "0.8.15",
|
||||||
"description": "AI Utility library",
|
"description": "AI Utility library",
|
||||||
"author": "Zak Timson",
|
"author": "Zak Timson",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
|||||||
64
src/llm.ts
64
src/llm.ts
@@ -1,3 +1,4 @@
|
|||||||
|
import {sum} from '@tensorflow/tfjs';
|
||||||
import {JSONAttemptParse} from '@ztimson/utils';
|
import {JSONAttemptParse} from '@ztimson/utils';
|
||||||
import {AbortablePromise, Ai} from './ai.ts';
|
import {AbortablePromise, Ai} from './ai.ts';
|
||||||
import {Anthropic} from './antrhopic.ts';
|
import {Anthropic} from './antrhopic.ts';
|
||||||
@@ -117,12 +118,13 @@ class LLM {
|
|||||||
const score = (o ? this.cosineSimilarity(m.embeddings[0], o[0].embedding) : 0)
|
const score = (o ? this.cosineSimilarity(m.embeddings[0], o[0].embedding) : 0)
|
||||||
+ (q ? this.cosineSimilarity(m.embeddings[1], q[0].embedding) : 0);
|
+ (q ? this.cosineSimilarity(m.embeddings[1], q[0].embedding) : 0);
|
||||||
return {...m, score};
|
return {...m, score};
|
||||||
}).toSorted((a: any, b: any) => a.score - b.score).slice(0, limit);
|
}).toSorted((a: any, b: any) => a.score - b.score).slice(0, limit)
|
||||||
|
.map(m => `- ${m.owner}: ${m.fact}`).join('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
options.system += '\nYou have RAG memory and will be given the top_k closest memories regarding the users query. Save anything new you have learned worth remembering from the user message using the remember tool and feel free to recall memories manually.\n';
|
options.system += '\nYou have RAG memory and will be given the top_k closest memories regarding the users query. Save anything new you have learned worth remembering from the user message using the remember tool and feel free to recall memories manually.\n';
|
||||||
const relevant = await search(message);
|
const relevant = await search(message);
|
||||||
if(relevant.length) options.history.push({role: 'tool', name: 'recall', id: 'auto_recall_' + Math.random().toString(), args: {}, content: 'Things I remembered:\n' + relevant.map(m => `${m.owner}: ${m.fact}`).join('\n')});
|
if(relevant.length) options.history.push({role: 'tool', name: 'recall', id: 'auto_recall_' + Math.random().toString(), args: {}, content: `Things I remembered:\n${relevant}`});
|
||||||
options.tools = [{
|
options.tools = [{
|
||||||
name: 'recall',
|
name: 'recall',
|
||||||
description: 'Recall the closest memories you have regarding a query using RAG',
|
description: 'Recall the closest memories you have regarding a query using RAG',
|
||||||
@@ -356,22 +358,64 @@ class LLM {
|
|||||||
* @returns {Promise<{} | {} | RegExpExecArray | null>}
|
* @returns {Promise<{} | {} | RegExpExecArray | null>}
|
||||||
*/
|
*/
|
||||||
async json(text: string, schema: string, options?: LLMRequest): Promise<any> {
|
async json(text: string, schema: string, options?: LLMRequest): Promise<any> {
|
||||||
const code = await this.code(text, {...options, system: [
|
let system = `Your job is to convert input to JSON using tool calls. Call the \`submit\` tool at least once with JSON matching this schema:\n\`\`\`json\n${schema}\n\`\`\`\n\nResponses are ignored`;
|
||||||
options?.system,
|
if(options?.system) system += '\n\n' + options.system;
|
||||||
`Only respond using JSON matching this schema:\n\`\`\`json\n${schema}\n\`\`\``
|
return new Promise(async (resolve, reject) => {
|
||||||
].filter(t => !!t).join('\n')});
|
let done = false;
|
||||||
return code ? JSONAttemptParse(code, {}) : null;
|
const resp = await this.ask(text, {
|
||||||
|
temperature: 0.3,
|
||||||
|
...options,
|
||||||
|
system,
|
||||||
|
tools: [{
|
||||||
|
name: 'submit',
|
||||||
|
description: 'Submit JSON',
|
||||||
|
args: {json: {type: 'string', description: 'Javascript parsable JSON string', required: true}},
|
||||||
|
fn: (args) => {
|
||||||
|
try {
|
||||||
|
const json = JSON.parse(args.json);
|
||||||
|
resolve(json);
|
||||||
|
done = true;
|
||||||
|
} catch { return 'Invalid JSON'; }
|
||||||
|
return 'Saved';
|
||||||
|
}
|
||||||
|
}, ...(options?.tools || [])],
|
||||||
|
});
|
||||||
|
if(!done) reject(`AI failed to create JSON:\n${resp}`);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a summary of some text
|
* Create a summary of some text
|
||||||
* @param {string} text Text to summarize
|
* @param {string} text Text to summarize
|
||||||
* @param {number} tokens Max number of tokens
|
* @param {number} length Max number of words
|
||||||
* @param options LLM request options
|
* @param options LLM request options
|
||||||
* @returns {Promise<string>} Summary
|
* @returns {Promise<string>} Summary
|
||||||
*/
|
*/
|
||||||
summarize(text: string, tokens: number = 500, options?: LLMRequest): Promise<string | null> {
|
async summarize(text: string, length: number = 500, options?: LLMRequest): Promise<string | null> {
|
||||||
return this.ask(text, {system: `Generate the shortest summary possible <= ${tokens} tokens. Output nothing else`, temperature: 0.3, ...options});
|
let system = `Your job is to summarize the users message using tool calls. Call the \`submit\` tool at least once with the shortest summary possible that's <= ${length} words. The tool call will respond with the token count. Responses are ignored`;
|
||||||
|
if(options?.system) system += '\n\n' + options.system;
|
||||||
|
return new Promise(async (resolve, reject) => {
|
||||||
|
let done = false;
|
||||||
|
const resp = await this.ask(text, {
|
||||||
|
temperature: 0.3,
|
||||||
|
...options,
|
||||||
|
system,
|
||||||
|
tools: [{
|
||||||
|
name: 'submit',
|
||||||
|
description: 'Submit summary',
|
||||||
|
args: {summary: {type: 'string', description: 'Text summarization', required: true}},
|
||||||
|
fn: (args) => {
|
||||||
|
if(!args.summary) return 'No summary provided';
|
||||||
|
const count = args.summary.split(' ').length;
|
||||||
|
if(count > length) return `Too long: ${length} words`;
|
||||||
|
done = true;
|
||||||
|
resolve(args.summary || null);
|
||||||
|
return `Saved: ${length} words`;
|
||||||
|
}
|
||||||
|
}, ...(options?.tools || [])],
|
||||||
|
});
|
||||||
|
if(!done) reject(`AI failed to create summary:\n${resp}`);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
124
src/tools.ts
124
src/tools.ts
@@ -51,6 +51,13 @@ export const CliTool: AiTool = {
|
|||||||
|
|
||||||
export const DateTimeTool: AiTool = {
|
export const DateTimeTool: AiTool = {
|
||||||
name: 'get_datetime',
|
name: 'get_datetime',
|
||||||
|
description: 'Get local date / time',
|
||||||
|
args: {},
|
||||||
|
fn: async () => new Date().toString()
|
||||||
|
}
|
||||||
|
|
||||||
|
export const DateTimeUTCTool: AiTool = {
|
||||||
|
name: 'get_datetime_utc',
|
||||||
description: 'Get current UTC date / time',
|
description: 'Get current UTC date / time',
|
||||||
args: {},
|
args: {},
|
||||||
fn: async () => new Date().toUTCString()
|
fn: async () => new Date().toUTCString()
|
||||||
@@ -65,14 +72,15 @@ export const ExecTool: AiTool = {
|
|||||||
},
|
},
|
||||||
fn: async (args, stream, ai) => {
|
fn: async (args, stream, ai) => {
|
||||||
try {
|
try {
|
||||||
switch(args.type) {
|
switch(args.language) {
|
||||||
case 'cli':
|
case 'cli':
|
||||||
return await CliTool.fn({command: args.code}, stream, ai);
|
return await CliTool.fn({command: args.code}, stream, ai);
|
||||||
case 'node':
|
case 'node':
|
||||||
return await JSTool.fn({code: args.code}, stream, ai);
|
return await JSTool.fn({code: args.code}, stream, ai);
|
||||||
case 'python': {
|
case 'python':
|
||||||
return await PythonTool.fn({code: args.code}, stream, ai);
|
return await PythonTool.fn({code: args.code}, stream, ai);
|
||||||
}
|
default:
|
||||||
|
throw new Error(`Unsupported language: ${args.language}`);
|
||||||
}
|
}
|
||||||
} catch(err: any) {
|
} catch(err: any) {
|
||||||
return {error: err?.message || err.toString()};
|
return {error: err?.message || err.toString()};
|
||||||
@@ -104,9 +112,9 @@ export const JSTool: AiTool = {
|
|||||||
code: {type: 'string', description: 'CommonJS javascript', required: true}
|
code: {type: 'string', description: 'CommonJS javascript', required: true}
|
||||||
},
|
},
|
||||||
fn: async (args: {code: string}) => {
|
fn: async (args: {code: string}) => {
|
||||||
const console = consoleInterceptor(null);
|
const c = consoleInterceptor(null);
|
||||||
const resp = await Fn<any>({console}, args.code, true).catch((err: any) => console.output.error.push(err));
|
const resp = await Fn<any>({console: c}, args.code, true).catch((err: any) => c.output.error.push(err));
|
||||||
return {...console.output, return: resp, stdout: undefined, stderr: undefined};
|
return {...c.output, return: resp, stdout: undefined, stderr: undefined};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -121,24 +129,82 @@ export const PythonTool: AiTool = {
|
|||||||
|
|
||||||
export const ReadWebpageTool: AiTool = {
|
export const ReadWebpageTool: AiTool = {
|
||||||
name: 'read_webpage',
|
name: 'read_webpage',
|
||||||
description: 'Extract clean, structured content from a webpage. Use after web_search to read specific URLs',
|
description: 'Extract clean, structured content from a webpage or convert media/documents to accessible formats',
|
||||||
args: {
|
args: {
|
||||||
url: {type: 'string', description: 'URL to extract content from', required: true},
|
url: {type: 'string', description: 'URL to extract content from', required: true},
|
||||||
focus: {type: 'string', description: 'Optional: What aspect to focus on (e.g., "pricing", "features", "contact info")'}
|
mimeRegex: {type: 'string', description: 'Optional: Regex pattern to filter MIME types (e.g., "^image/", "text/", "application/pdf")'},
|
||||||
|
maxSize: {type: 'number', description: 'Optional: Max file size in bytes for binary content (default: 10MB)'}
|
||||||
},
|
},
|
||||||
fn: async (args: {url: string; focus?: string}) => {
|
fn: async (args: {url: string; mimeRegex?: string;}) => {
|
||||||
const html = await fetch(args.url, {headers: {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}})
|
const maxSize = 10 * 1024 * 1024; // 10 MB
|
||||||
.then(r => r.text()).catch(err => {throw new Error(`Failed to fetch: ${err.message}`)});
|
|
||||||
|
|
||||||
|
const response = await fetch(args.url, {
|
||||||
|
headers: {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||||
|
"Accept-Language": "en-US,en;q=0.5"
|
||||||
|
},
|
||||||
|
redirect: 'follow'
|
||||||
|
}).catch(err => {throw new Error(`Failed to fetch: ${err.message}`)});
|
||||||
|
|
||||||
|
const contentType = response.headers.get('content-type') || '';
|
||||||
|
const mimeType = contentType.split(';')[0].trim().toLowerCase();
|
||||||
|
const charset = contentType.match(/charset=([^;]+)/)?.[1] || 'utf-8';
|
||||||
|
|
||||||
|
// Filter by MIME type if specified
|
||||||
|
if (args.mimeRegex) {
|
||||||
|
const regex = new RegExp(args.mimeRegex, 'i');
|
||||||
|
if (!regex.test(mimeType)) {
|
||||||
|
return {url: args.url, error: 'MIME type rejected', mimeType, filter: args.mimeRegex};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle images, audio, video -> data URL
|
||||||
|
if (mimeType.startsWith('image/') || mimeType.startsWith('audio/') || mimeType.startsWith('video/')) {
|
||||||
|
const buffer = await response.arrayBuffer();
|
||||||
|
if (buffer.byteLength > maxSize) {
|
||||||
|
return {url: args.url, type: 'media', mimeType, error: 'File too large', size: buffer.byteLength, maxSize};
|
||||||
|
}
|
||||||
|
const base64 = Buffer.from(buffer).toString('base64');
|
||||||
|
return {url: args.url, type: 'media', mimeType, dataUrl: `data:${mimeType};base64,${base64}`, size: buffer.byteLength};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle plain text, json, xml, csv
|
||||||
|
if (mimeType.match(/^(text\/(plain|csv|xml)|application\/(json|xml|csv|x-yaml))/) ||
|
||||||
|
args.url.match(/\.(txt|json|xml|csv|yaml|yml|md)$/i)) {
|
||||||
|
const text = await response.text();
|
||||||
|
return {url: args.url, type: 'text', mimeType, content: text.slice(0, 100000)};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle PDFs and other binaries -> data URL
|
||||||
|
if (mimeType === 'application/pdf' || mimeType.startsWith('application/') && !mimeType.includes('html')) {
|
||||||
|
const buffer = await response.arrayBuffer();
|
||||||
|
if (buffer.byteLength > maxSize) {
|
||||||
|
return {url: args.url, type: 'binary', mimeType, error: 'File too large', size: buffer.byteLength, maxSize};
|
||||||
|
}
|
||||||
|
const base64 = Buffer.from(buffer).toString('base64');
|
||||||
|
return {url: args.url, type: 'binary', mimeType, dataUrl: `data:${mimeType};base64,${base64}`, size: buffer.byteLength};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default HTML handling
|
||||||
|
const html = await response.text();
|
||||||
const $ = cheerio.load(html);
|
const $ = cheerio.load(html);
|
||||||
$('script, style, nav, footer, header, aside, iframe, noscript, [role="navigation"], [role="banner"], .ad, .ads, .cookie, .popup').remove();
|
|
||||||
|
// Remove noise
|
||||||
|
$('script, style, nav, footer, header, aside, iframe, noscript, svg, [role="navigation"], [role="banner"], [role="complementary"], .ad, .ads, .advertisement, .cookie, .popup, .modal, .sidebar, .related, .comments, .social-share').remove();
|
||||||
|
|
||||||
|
// Extract metadata
|
||||||
const metadata = {
|
const metadata = {
|
||||||
title: $('meta[property="og:title"]').attr('content') || $('title').text() || '',
|
title: $('meta[property="og:title"]').attr('content') || $('title').text() || '',
|
||||||
description: $('meta[name="description"]').attr('content') || $('meta[property="og:description"]').attr('content') || '',
|
description: $('meta[name="description"]').attr('content') || $('meta[property="og:description"]').attr('content') || '',
|
||||||
|
author: $('meta[name="author"]').attr('content') || '',
|
||||||
|
published: $('meta[property="article:published_time"]').attr('content') || $('time').attr('datetime') || '',
|
||||||
|
image: $('meta[property="og:image"]').attr('content') || ''
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Extract structured content
|
||||||
let content = '';
|
let content = '';
|
||||||
const contentSelectors = ['article', 'main', '[role="main"]', '.content', '.post', '.entry', 'body'];
|
const contentSelectors = ['article', 'main', '[role="main"]', '.content', '.post-content', '.entry-content', '.article-content', 'body'];
|
||||||
for (const selector of contentSelectors) {
|
for (const selector of contentSelectors) {
|
||||||
const el = $(selector).first();
|
const el = $(selector).first();
|
||||||
if (el.length && el.text().trim().length > 200) {
|
if (el.length && el.text().trim().length > 200) {
|
||||||
@@ -147,9 +213,37 @@ export const ReadWebpageTool: AiTool = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!content) content = $('body').text();
|
if (!content) content = $('body').text();
|
||||||
content = content.replace(/\s+/g, ' ').trim().slice(0, 8000);
|
|
||||||
|
|
||||||
return {url: args.url, title: metadata.title.trim(), description: metadata.description.trim(), content, focus: args.focus};
|
// Clean whitespace but preserve structure
|
||||||
|
content = content
|
||||||
|
.replace(/\n\s*\n\s*\n/g, '\n\n')
|
||||||
|
.replace(/[ \t]+/g, ' ')
|
||||||
|
.trim()
|
||||||
|
.slice(0, 50000);
|
||||||
|
|
||||||
|
// Extract links if minimal content
|
||||||
|
let links: any[] = [];
|
||||||
|
if (content.length < 500) {
|
||||||
|
$('a[href]').each((_, el) => {
|
||||||
|
const href = $(el).attr('href');
|
||||||
|
const text = $(el).text().trim();
|
||||||
|
if (href && text && !href.startsWith('#')) {
|
||||||
|
links.push({text, href});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
links = links.slice(0, 50);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
url: args.url,
|
||||||
|
type: 'html',
|
||||||
|
title: metadata.title.trim(),
|
||||||
|
description: metadata.description.trim(),
|
||||||
|
author: metadata.author.trim(),
|
||||||
|
published: metadata.published,
|
||||||
|
content,
|
||||||
|
links: links.length ? links : undefined,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user