Updated LLM config and added read_webpage
This commit is contained in:
41
src/tools.ts
41
src/tools.ts
@@ -1,3 +1,4 @@
|
||||
import * as cheerio from 'cheerio';
|
||||
import {$, $Sync} from '@ztimson/node-utils';
|
||||
import {ASet, consoleInterceptor, Http, fn as Fn} from '@ztimson/utils';
|
||||
import {Ai} from './ai.ts';
|
||||
@@ -111,9 +112,43 @@ export const PythonTool: AiTool = {
|
||||
fn: async (args: {code: string}) => ({result: $Sync`python -c "${args.code}"`})
|
||||
}
|
||||
|
||||
export const SearchTool: AiTool = {
|
||||
name: 'search',
|
||||
description: 'Use a search engine to find relevant URLs, should be changed with fetch to scrape sources',
|
||||
export const ReadWebpageTool: AiTool = {
|
||||
name: 'read_webpage',
|
||||
description: 'Extract clean, structured content from a webpage. Use after web_search to read specific URLs',
|
||||
args: {
|
||||
url: {type: 'string', description: 'URL to extract content from', required: true},
|
||||
focus: {type: 'string', description: 'Optional: What aspect to focus on (e.g., "pricing", "features", "contact info")'}
|
||||
},
|
||||
fn: async (args: {url: string; focus?: string}) => {
|
||||
const html = await fetch(args.url, {headers: {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}})
|
||||
.then(r => r.text()).catch(err => {throw new Error(`Failed to fetch: ${err.message}`)});
|
||||
|
||||
const $ = cheerio.load(html);
|
||||
$('script, style, nav, footer, header, aside, iframe, noscript, [role="navigation"], [role="banner"], .ad, .ads, .cookie, .popup').remove();
|
||||
const metadata = {
|
||||
title: $('meta[property="og:title"]').attr('content') || $('title').text() || '',
|
||||
description: $('meta[name="description"]').attr('content') || $('meta[property="og:description"]').attr('content') || '',
|
||||
};
|
||||
|
||||
let content = '';
|
||||
const contentSelectors = ['article', 'main', '[role="main"]', '.content', '.post', '.entry', 'body'];
|
||||
for (const selector of contentSelectors) {
|
||||
const el = $(selector).first();
|
||||
if (el.length && el.text().trim().length > 200) {
|
||||
content = el.text();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!content) content = $('body').text();
|
||||
content = content.replace(/\s+/g, ' ').trim().slice(0, 8000);
|
||||
|
||||
return {url: args.url, title: metadata.title.trim(), description: metadata.description.trim(), content, focus: args.focus};
|
||||
}
|
||||
}
|
||||
|
||||
export const WebSearchTool: AiTool = {
|
||||
name: 'web_search',
|
||||
description: 'Use duckduckgo (anonymous) to find find relevant online resources. Returns a list of URLs that works great with the `read_webpage` tool',
|
||||
args: {
|
||||
query: {type: 'string', description: 'Search string', required: true},
|
||||
length: {type: 'string', description: 'Number of results to return', default: 5},
|
||||
|
||||
Reference in New Issue
Block a user