175 lines
6.0 KiB
TypeScript
175 lines
6.0 KiB
TypeScript
import * as cheerio from 'cheerio';
|
|
import {$, $Sync} from '@ztimson/node-utils';
|
|
import {ASet, consoleInterceptor, Http, fn as Fn} from '@ztimson/utils';
|
|
import {Ai} from './ai.ts';
|
|
import {LLMRequest} from './llm.ts';
|
|
|
|
export type AiToolArg = {[key: string]: {
|
|
/** Argument type */
|
|
type: 'array' | 'boolean' | 'number' | 'object' | 'string',
|
|
/** Argument description */
|
|
description: string,
|
|
/** Required argument */
|
|
required?: boolean;
|
|
/** Default value */
|
|
default?: any,
|
|
/** Options */
|
|
enum?: string[],
|
|
/** Minimum value or length */
|
|
min?: number,
|
|
/** Maximum value or length */
|
|
max?: number,
|
|
/** Match pattern */
|
|
pattern?: string,
|
|
/** Child arguments */
|
|
items?: {[key: string]: AiToolArg}
|
|
}}
|
|
|
|
export type AiTool = {
|
|
/** Tool ID / Name - Must be snail_case */
|
|
name: string,
|
|
/** Tool description / prompt */
|
|
description: string,
|
|
/** Tool arguments */
|
|
args?: AiToolArg,
|
|
/** Callback function */
|
|
fn: (args: any, stream: LLMRequest['stream'], ai: Ai) => any | Promise<any>,
|
|
};
|
|
|
|
export const CliTool: AiTool = {
|
|
name: 'cli',
|
|
description: 'Use the command line interface, returns any output',
|
|
args: {command: {type: 'string', description: 'Command to run', required: true}},
|
|
fn: (args: {command: string}) => $`${args.command}`
|
|
}
|
|
|
|
export const DateTimeTool: AiTool = {
|
|
name: 'get_datetime',
|
|
description: 'Get current UTC date / time',
|
|
args: {},
|
|
fn: async () => new Date().toUTCString()
|
|
}
|
|
|
|
export const ExecTool: AiTool = {
|
|
name: 'exec',
|
|
description: 'Run code/scripts',
|
|
args: {
|
|
language: {type: 'string', description: 'Execution language', enum: ['cli', 'node', 'python'], required: true},
|
|
code: {type: 'string', description: 'Code to execute', required: true}
|
|
},
|
|
fn: async (args, stream, ai) => {
|
|
try {
|
|
switch(args.type) {
|
|
case 'bash':
|
|
return await CliTool.fn({command: args.code}, stream, ai);
|
|
case 'node':
|
|
return await JSTool.fn({code: args.code}, stream, ai);
|
|
case 'python': {
|
|
return await PythonTool.fn({code: args.code}, stream, ai);
|
|
}
|
|
}
|
|
} catch(err: any) {
|
|
return {error: err?.message || err.toString()};
|
|
}
|
|
}
|
|
}
|
|
|
|
export const FetchTool: AiTool = {
|
|
name: 'fetch',
|
|
description: 'Make HTTP request to URL',
|
|
args: {
|
|
url: {type: 'string', description: 'URL to fetch', required: true},
|
|
method: {type: 'string', description: 'HTTP method to use', enum: ['GET', 'POST', 'PUT', 'DELETE'], default: 'GET'},
|
|
headers: {type: 'object', description: 'HTTP headers to send', default: {}},
|
|
body: {type: 'object', description: 'HTTP body to send'},
|
|
},
|
|
fn: (args: {
|
|
url: string;
|
|
method: 'GET' | 'POST' | 'PUT' | 'DELETE';
|
|
headers: {[key: string]: string};
|
|
body: any;
|
|
}) => new Http({url: args.url, headers: args.headers}).request({method: args.method || 'GET', body: args.body})
|
|
}
|
|
|
|
export const JSTool: AiTool = {
|
|
name: 'exec_javascript',
|
|
description: 'Execute commonjs javascript',
|
|
args: {
|
|
code: {type: 'string', description: 'CommonJS javascript', required: true}
|
|
},
|
|
fn: async (args: {code: string}) => {
|
|
const console = consoleInterceptor(null);
|
|
const resp = await Fn<any>({console}, args.code, true).catch((err: any) => console.output.error.push(err));
|
|
return {...console.output, return: resp, stdout: undefined, stderr: undefined};
|
|
}
|
|
}
|
|
|
|
export const PythonTool: AiTool = {
|
|
name: 'exec_javascript',
|
|
description: 'Execute commonjs javascript',
|
|
args: {
|
|
code: {type: 'string', description: 'CommonJS javascript', required: true}
|
|
},
|
|
fn: async (args: {code: string}) => ({result: $Sync`python -c "${args.code}"`})
|
|
}
|
|
|
|
export const ReadWebpageTool: AiTool = {
|
|
name: 'read_webpage',
|
|
description: 'Extract clean, structured content from a webpage. Use after web_search to read specific URLs',
|
|
args: {
|
|
url: {type: 'string', description: 'URL to extract content from', required: true},
|
|
focus: {type: 'string', description: 'Optional: What aspect to focus on (e.g., "pricing", "features", "contact info")'}
|
|
},
|
|
fn: async (args: {url: string; focus?: string}) => {
|
|
const html = await fetch(args.url, {headers: {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}})
|
|
.then(r => r.text()).catch(err => {throw new Error(`Failed to fetch: ${err.message}`)});
|
|
|
|
const $ = cheerio.load(html);
|
|
$('script, style, nav, footer, header, aside, iframe, noscript, [role="navigation"], [role="banner"], .ad, .ads, .cookie, .popup').remove();
|
|
const metadata = {
|
|
title: $('meta[property="og:title"]').attr('content') || $('title').text() || '',
|
|
description: $('meta[name="description"]').attr('content') || $('meta[property="og:description"]').attr('content') || '',
|
|
};
|
|
|
|
let content = '';
|
|
const contentSelectors = ['article', 'main', '[role="main"]', '.content', '.post', '.entry', 'body'];
|
|
for (const selector of contentSelectors) {
|
|
const el = $(selector).first();
|
|
if (el.length && el.text().trim().length > 200) {
|
|
content = el.text();
|
|
break;
|
|
}
|
|
}
|
|
if (!content) content = $('body').text();
|
|
content = content.replace(/\s+/g, ' ').trim().slice(0, 8000);
|
|
|
|
return {url: args.url, title: metadata.title.trim(), description: metadata.description.trim(), content, focus: args.focus};
|
|
}
|
|
}
|
|
|
|
export const WebSearchTool: AiTool = {
|
|
name: 'web_search',
|
|
description: 'Use duckduckgo (anonymous) to find find relevant online resources. Returns a list of URLs that works great with the `read_webpage` tool',
|
|
args: {
|
|
query: {type: 'string', description: 'Search string', required: true},
|
|
length: {type: 'string', description: 'Number of results to return', default: 5},
|
|
},
|
|
fn: async (args: {
|
|
query: string;
|
|
length: number;
|
|
}) => {
|
|
const html = await fetch(`https://html.duckduckgo.com/html/?q=${encodeURIComponent(args.query)}`, {
|
|
headers: {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "Accept-Language": "en-US,en;q=0.9"}
|
|
}).then(resp => resp.text());
|
|
let match, regex = /<a .*?href="(.+?)".+?<\/a>/g;
|
|
const results = new ASet<string>();
|
|
while((match = regex.exec(html)) !== null) {
|
|
let url = /uddg=(.+)&?/.exec(decodeURIComponent(match[1]))?.[1];
|
|
if(url) url = decodeURIComponent(url);
|
|
if(url) results.add(url);
|
|
if(results.size >= (args.length || 5)) break;
|
|
}
|
|
return results;
|
|
}
|
|
}
|