import * as cheerio from 'cheerio'; import {$Sync} from '@ztimson/node-utils'; import {ASet, consoleInterceptor, Http, fn as Fn, decodeHtml} from '@ztimson/utils'; import * as os from 'node:os'; import {Ai} from './ai.ts'; import {LLMRequest} from './llm.ts'; const UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'; const getShell = () => { if(os.platform() == 'win32') return 'cmd'; return $Sync`echo $SHELL`?.split('/').pop() || 'bash'; } export type AiToolArg = {[key: string]: { /** Argument type */ type: 'array' | 'boolean' | 'number' | 'object' | 'string', /** Argument description */ description: string, /** Required argument */ required?: boolean; /** Default value */ default?: any, /** Options */ enum?: string[], /** Minimum value or length */ min?: number, /** Maximum value or length */ max?: number, /** Match pattern */ pattern?: string, /** Child arguments */ items?: {[key: string]: AiToolArg} }} export type AiTool = { /** Tool ID / Name - Must be snail_case */ name: string, /** Tool description / prompt */ description: string, /** Tool arguments */ args?: AiToolArg, /** Callback function */ fn: (args: any, stream: LLMRequest['stream'], ai: Ai) => any | Promise, }; export const CliTool: AiTool = { name: 'cli', description: 'Use the command line interface, returns any output', args: {command: {type: 'string', description: 'Command to run', required: true}}, fn: (args: {command: string}) => $Sync`${args.command}` } export const DateTimeTool: AiTool = { name: 'get_datetime', description: 'Get local date / time', args: {}, fn: async () => new Date().toString() } export const DateTimeUTCTool: AiTool = { name: 'get_datetime_utc', description: 'Get current UTC date / time', args: {}, fn: async () => new Date().toUTCString() } export const ExecTool: AiTool = { name: 'exec', description: 'Run code/scripts', args: { language: {type: 'string', description: `Execution language (CLI: ${getShell()})`, enum: ['cli', 'node', 'python'], required: true}, code: {type: 'string', description: 'Code to execute', required: true} }, fn: async (args, stream, ai) => { try { switch(args.language) { case 'cli': return await CliTool.fn({command: args.code}, stream, ai); case 'node': return await JSTool.fn({code: args.code}, stream, ai); case 'python': return await PythonTool.fn({code: args.code}, stream, ai); default: throw new Error(`Unsupported language: ${args.language}`); } } catch(err: any) { return {error: err?.message || err.toString()}; } } } export const FetchTool: AiTool = { name: 'fetch', description: 'Make HTTP request to URL', args: { url: {type: 'string', description: 'URL to fetch', required: true}, method: {type: 'string', description: 'HTTP method to use', enum: ['GET', 'POST', 'PUT', 'DELETE'], default: 'GET'}, headers: {type: 'object', description: 'HTTP headers to send', default: {}}, body: {type: 'object', description: 'HTTP body to send'}, }, fn: (args: { url: string; method: 'GET' | 'POST' | 'PUT' | 'DELETE'; headers: {[key: string]: string}; body: any; }) => new Http({url: args.url, headers: args.headers}).request({method: args.method || 'GET', body: args.body}) } export const JSTool: AiTool = { name: 'exec_javascript', description: 'Execute commonjs javascript', args: { code: {type: 'string', description: 'CommonJS javascript', required: true} }, fn: async (args: {code: string}) => { const c = consoleInterceptor(null); const resp = await Fn({console: c}, args.code, true).catch((err: any) => c.output.error.push(err)); return {...c.output, return: resp, stdout: undefined, stderr: undefined}; } } export const PythonTool: AiTool = { name: 'exec_javascript', description: 'Execute commonjs javascript', args: { code: {type: 'string', description: 'CommonJS javascript', required: true} }, fn: async (args: {code: string}) => ({result: $Sync`python -c "${args.code}"`}) } export const ReadWebpageTool: AiTool = { name: 'read_webpage', description: 'Extract clean content from webpages, or convert media/documents to accessible formats', args: { url: {type: 'string', description: 'URL to read', required: true}, mimeRegex: {type: 'string', description: 'Optional regex to filter MIME types (e.g., "^image/", "text/")'} }, fn: async (args: {url: string; mimeRegex?: string}) => { const ua = 'AiTools-Webpage/1.0'; const maxSize = 10 * 1024 * 1024; const response = await fetch(args.url, { headers: { 'User-Agent': ua, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5' }, redirect: 'follow' }).catch(err => {throw new Error(`Failed to fetch: ${err.message}`)}); const contentType = response.headers.get('content-type') || ''; const mimeType = contentType.split(';')[0].trim().toLowerCase(); if(args.mimeRegex && !new RegExp(args.mimeRegex, 'i').test(mimeType)) { return `āŒ MIME type rejected: ${mimeType} (filter: ${args.mimeRegex})`; } if(mimeType.match(/^(image|audio|video)\//)) { const buffer = await response.arrayBuffer(); if(buffer.byteLength > maxSize) { return `āŒ File too large: ${(buffer.byteLength / 1024 / 1024).toFixed(1)}MB (max 10MB)\nType: ${mimeType}`; } const base64 = Buffer.from(buffer).toString('base64'); return `## Media File\n**Type:** ${mimeType}\n**Size:** ${(buffer.byteLength / 1024).toFixed(1)}KB\n**Data URL:** \`data:${mimeType};base64,${base64.slice(0, 100)}...\``; } if(mimeType.match(/^text\/(plain|csv|xml)/) || args.url.match(/\.(txt|csv|xml|md|yaml|yml)$/i)) { const text = await response.text(); const truncated = text.length > 50000 ? text.slice(0, 50000) : text; return `## Text File\n**Type:** ${mimeType}\n**URL:** ${args.url}\n\n${truncated}`; } if(mimeType.match(/application\/(json|xml|csv)/)) { const text = await response.text(); const truncated = text.length > 50000 ? text.slice(0, 50000) : text; return `## Structured Data\n**Type:** ${mimeType}\n**URL:** ${args.url}\n\n\`\`\`\n${truncated}\n\`\`\``; } if(mimeType === 'application/pdf' || (mimeType.startsWith('application/') && !mimeType.includes('html'))) { const buffer = await response.arrayBuffer(); if(buffer.byteLength > maxSize) { return `āŒ File too large: ${(buffer.byteLength / 1024 / 1024).toFixed(1)}MB (max 10MB)\nType: ${mimeType}`; } const base64 = Buffer.from(buffer).toString('base64'); return `## Binary File\n**Type:** ${mimeType}\n**Size:** ${(buffer.byteLength / 1024).toFixed(1)}KB\n**Data URL:** \`data:${mimeType};base64,${base64.slice(0, 100)}...\``; } // HTML const html = await response.text(); const $ = cheerio.load(html); $('script, style, nav, footer, header, aside, iframe, noscript, svg').remove(); $('[role="navigation"], [role="banner"], [role="complementary"]').remove(); $('[aria-hidden="true"], [hidden], .visually-hidden, .sr-only, .screen-reader-text').remove(); $('.ad, .ads, .advertisement, .cookie, .popup, .modal, .sidebar, .related, .comments, .social-share').remove(); $('button, [class*="share"], [class*="follow"], [class*="social"]').remove(); const title = $('meta[property="og:title"]').attr('content') || $('title').text().trim() || ''; const description = $('meta[name="description"]').attr('content') || $('meta[property="og:description"]').attr('content') || ''; const author = $('meta[name="author"]').attr('content') || ''; let content = ''; const selectors = ['article', 'main', '[role="main"]', '.content', '.post-content', '.entry-content', '.article-content']; for(const sel of selectors) { const el = $(sel).first(); if(el.length && el.text().trim().length > 200) { const paragraphs: string[] = []; el.find('p').each((_, p) => { const text = $(p).text().trim(); if(text.length > 80) paragraphs.push(text); }); if(paragraphs.length > 2) { content = paragraphs.join('\n\n'); break; } } } if(!content) { const paragraphs: string[] = []; $('body p').each((_, p) => { const text = $(p).text().trim(); if(text.length > 80) paragraphs.push(text); }); content = paragraphs.slice(0, 30).join('\n\n'); } // Decode escaped newlines and clean const parts = [`## ${title || 'Webpage'}`]; if(description) parts.push(`_${description}_`); if(author) parts.push(`šŸ‘¤ ${author}`); parts.push(`šŸ”— ${args.url}\n`); parts.push(content); return decodeHtml(parts.join('\n\n').replaceAll(/\n{3,}/g, '\n\n')); } }; export const WebSearchTool: AiTool = { name: 'web_search', description: 'Use duckduckgo (anonymous) to find find relevant online resources. Returns a list of URLs that works great with the `read_webpage` tool', args: { query: {type: 'string', description: 'Search string', required: true}, length: {type: 'string', description: 'Number of results to return', default: 5}, }, fn: async (args: { query: string; length: number; }) => { const html = await fetch(`https://html.duckduckgo.com/html/?q=${encodeURIComponent(args.query)}`, { headers: {"User-Agent": UA, "Accept-Language": "en-US,en;q=0.9"} }).then(resp => resp.text()); let match, regex = //g; const results = new ASet(); while((match = regex.exec(html)) !== null) { let url = /uddg=(.+)&?/.exec(decodeURIComponent(match[1]))?.[1]; if(url) url = decodeURIComponent(url); if(url) results.add(url); if(results.size >= (args.length || 5)) break; } return results; } } class WikipediaClient { private async get(url: string): Promise { const resp = await fetch(url, {headers: {'User-Agent': UA}}); return resp.json(); } private api(params: Record): Promise { const qs = new URLSearchParams({...params, format: 'json', utf8: '1'}).toString(); return this.get(`https://en.wikipedia.org/w/api.php?${qs}`); } private clean(text: string): string { return text.replace(/\n{3,}/g, '\n\n').replace(/ {2,}/g, ' ').replace(/\[\d+\]/g, '').trim(); } private truncate(text: string, max: number): string { if(text.length <= max) return text; const cut = text.slice(0, max); const lastPara = cut.lastIndexOf('\n\n'); return lastPara > max * 0.7 ? cut.slice(0, lastPara) : cut; } private async searchTitles(query: string, limit = 6): Promise { const data = await this.api({action: 'query', list: 'search', srsearch: query, srlimit: limit, srprop: 'snippet'}); return data.query?.search || []; } private async fetchExtract(title: string, intro = false): Promise { const params: any = {action: 'query', prop: 'extracts', titles: title, explaintext: 1, redirects: 1}; if(intro) params.exintro = 1; const data = await this.api(params); const page = Object.values(data.query?.pages || {})[0] as any; return this.clean(page?.extract || ''); } private pageUrl(title: string): string { return `https://en.wikipedia.org/wiki/${encodeURIComponent(title.replace(/ /g, '_'))}`; } private stripHtml(text: string): string { return text.replace(/<[^>]+>/g, ''); } async lookup(query: string, detail: 'intro' | 'full' = 'intro'): Promise { const results = await this.searchTitles(query, 6); if(!results.length) return `āŒ No Wikipedia articles found for "${query}"`; const title = results[0].title; const url = this.pageUrl(title); const content = await this.fetchExtract(title, detail === 'intro'); const text = this.truncate(content, detail === 'intro' ? 2000 : 8000); return `## ${title}\nšŸ”— ${url}\n\n${text}`; } async search(query: string): Promise { const results = await this.searchTitles(query, 8); if(!results.length) return `āŒ No results for "${query}"`; const lines = [`### Search results for "${query}"\n`]; for(let i = 0; i < results.length; i++) { const r = results[i]; const snippet = this.truncate(this.stripHtml(r.snippet || ''), 150); lines.push(`**${i + 1}. ${r.title}**\n${snippet}\n${this.pageUrl(r.title)}`); } return lines.join('\n\n'); } } export const WikipediaLookupTool: AiTool = { name: 'wikipedia_lookup', description: 'Get Wikipedia article content', args: { query: {type: 'string', description: 'Topic or article title', required: true}, detail: {type: 'string', description: 'Content level: "intro" (summary, default) or "full" (complete article)', enum: ['intro', 'full'], default: 'intro'} }, fn: async (args: {query: string; detail?: 'intro' | 'full'}) => { const wiki = new WikipediaClient(); return wiki.lookup(args.query, args.detail || 'intro'); } }; export const WikipediaSearchTool: AiTool = { name: 'wikipedia_search', description: 'Search Wikipedia for matching articles', args: { query: {type: 'string', description: 'Search terms', required: true} }, fn: async (args: {query: string}) => { const wiki = new WikipediaClient(); return wiki.search(args.query); } };