import * as cheerio from 'cheerio'; import {$, $Sync} from '@ztimson/node-utils'; import {ASet, consoleInterceptor, Http, fn as Fn} from '@ztimson/utils'; import {Ai} from './ai.ts'; import {LLMRequest} from './llm.ts'; export type AiToolArg = {[key: string]: { /** Argument type */ type: 'array' | 'boolean' | 'number' | 'object' | 'string', /** Argument description */ description: string, /** Required argument */ required?: boolean; /** Default value */ default?: any, /** Options */ enum?: string[], /** Minimum value or length */ min?: number, /** Maximum value or length */ max?: number, /** Match pattern */ pattern?: string, /** Child arguments */ items?: {[key: string]: AiToolArg} }} export type AiTool = { /** Tool ID / Name - Must be snail_case */ name: string, /** Tool description / prompt */ description: string, /** Tool arguments */ args?: AiToolArg, /** Callback function */ fn: (args: any, stream: LLMRequest['stream'], ai: Ai) => any | Promise, }; export const CliTool: AiTool = { name: 'cli', description: 'Use the command line interface, returns any output', args: {command: {type: 'string', description: 'Command to run', required: true}}, fn: (args: {command: string}) => $`${args.command}` } export const DateTimeTool: AiTool = { name: 'get_datetime', description: 'Get current UTC date / time', args: {}, fn: async () => new Date().toUTCString() } export const ExecTool: AiTool = { name: 'exec', description: 'Run code/scripts', args: { language: {type: 'string', description: 'Execution language', enum: ['cli', 'node', 'python'], required: true}, code: {type: 'string', description: 'Code to execute', required: true} }, fn: async (args, stream, ai) => { try { switch(args.type) { case 'bash': return await CliTool.fn({command: args.code}, stream, ai); case 'node': return await JSTool.fn({code: args.code}, stream, ai); case 'python': { return await PythonTool.fn({code: args.code}, stream, ai); } } } catch(err: any) { return {error: err?.message || err.toString()}; } } } export const FetchTool: AiTool = { name: 'fetch', description: 'Make HTTP request to URL', args: { url: {type: 'string', description: 'URL to fetch', required: true}, method: {type: 'string', description: 'HTTP method to use', enum: ['GET', 'POST', 'PUT', 'DELETE'], default: 'GET'}, headers: {type: 'object', description: 'HTTP headers to send', default: {}}, body: {type: 'object', description: 'HTTP body to send'}, }, fn: (args: { url: string; method: 'GET' | 'POST' | 'PUT' | 'DELETE'; headers: {[key: string]: string}; body: any; }) => new Http({url: args.url, headers: args.headers}).request({method: args.method || 'GET', body: args.body}) } export const JSTool: AiTool = { name: 'exec_javascript', description: 'Execute commonjs javascript', args: { code: {type: 'string', description: 'CommonJS javascript', required: true} }, fn: async (args: {code: string}) => { const console = consoleInterceptor(null); const resp = await Fn({console}, args.code, true).catch((err: any) => console.output.error.push(err)); return {...console.output, return: resp, stdout: undefined, stderr: undefined}; } } export const PythonTool: AiTool = { name: 'exec_javascript', description: 'Execute commonjs javascript', args: { code: {type: 'string', description: 'CommonJS javascript', required: true} }, fn: async (args: {code: string}) => ({result: $Sync`python -c "${args.code}"`}) } export const ReadWebpageTool: AiTool = { name: 'read_webpage', description: 'Extract clean, structured content from a webpage. Use after web_search to read specific URLs', args: { url: {type: 'string', description: 'URL to extract content from', required: true}, focus: {type: 'string', description: 'Optional: What aspect to focus on (e.g., "pricing", "features", "contact info")'} }, fn: async (args: {url: string; focus?: string}) => { const html = await fetch(args.url, {headers: {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}}) .then(r => r.text()).catch(err => {throw new Error(`Failed to fetch: ${err.message}`)}); const $ = cheerio.load(html); $('script, style, nav, footer, header, aside, iframe, noscript, [role="navigation"], [role="banner"], .ad, .ads, .cookie, .popup').remove(); const metadata = { title: $('meta[property="og:title"]').attr('content') || $('title').text() || '', description: $('meta[name="description"]').attr('content') || $('meta[property="og:description"]').attr('content') || '', }; let content = ''; const contentSelectors = ['article', 'main', '[role="main"]', '.content', '.post', '.entry', 'body']; for (const selector of contentSelectors) { const el = $(selector).first(); if (el.length && el.text().trim().length > 200) { content = el.text(); break; } } if (!content) content = $('body').text(); content = content.replace(/\s+/g, ' ').trim().slice(0, 8000); return {url: args.url, title: metadata.title.trim(), description: metadata.description.trim(), content, focus: args.focus}; } } export const WebSearchTool: AiTool = { name: 'web_search', description: 'Use duckduckgo (anonymous) to find find relevant online resources. Returns a list of URLs that works great with the `read_webpage` tool', args: { query: {type: 'string', description: 'Search string', required: true}, length: {type: 'string', description: 'Number of results to return', default: 5}, }, fn: async (args: { query: string; length: number; }) => { const html = await fetch(`https://html.duckduckgo.com/html/?q=${encodeURIComponent(args.query)}`, { headers: {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "Accept-Language": "en-US,en;q=0.9"} }).then(resp => resp.text()); let match, regex = //g; const results = new ASet(); while((match = regex.exec(html)) !== null) { let url = /uddg=(.+)&?/.exec(decodeURIComponent(match[1]))?.[1]; if(url) url = decodeURIComponent(url); if(url) results.add(url); if(results.size >= (args.length || 5)) break; } return results; } }