* Fixed llm response object (double encoding)

+ added wikitools + Improved webpage reading tool
Added wikipedia tools
2026-03-29 23:00:40 -04:00 · 2026-03-29 21:50:26 -04:00 · 2026-03-26 13:10:46 -04:00 · 2026-03-26 12:50:52 -04:00 · 2026-03-26 12:33:50 -04:00 · 2026-03-26 12:24:20 -04:00
6 changed files with 1206 additions and 358 deletions
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@ztimson/ai-utils",
-	"version": "0.8.11",
+	"version": "0.8.16",
 	"description": "AI Utility library",
 	"author": "Zak Timson",
 	"license": "MIT",
@@ -29,7 +29,7 @@
 		"@tensorflow/tfjs": "^4.22.0",
 		"@xenova/transformers": "^2.17.2",
 		"@ztimson/node-utils": "^1.0.7",
-		"@ztimson/utils": "^0.28.13",
+		"@ztimson/utils": "^0.28.16",
 		"cheerio": "^1.2.0",
 		"openai": "^6.22.0",
 		"tesseract.js": "^7.0.0"
--- a/src/antrhopic.ts
+++ b/src/antrhopic.ts
@@ -119,7 +119,7 @@ export class Anthropic extends LLMProvider {
 						if(!tool) return {tool_use_id: toolCall.id, is_error: true, content: 'Tool not found'};
 						try {
 							const result = await tool.fn(toolCall.input, options?.stream, this.ai);
-							return {type: 'tool_result', tool_use_id: toolCall.id, content: JSONSanitize(result)};
+							return {type: 'tool_result', tool_use_id: toolCall.id, content: typeof result == 'object' ? JSONSanitize(result) : result};
 						} catch (err: any) {
 							return {type: 'tool_result', tool_use_id: toolCall.id, is_error: true, content: err?.message || err?.toString() || 'Unknown'};
 						}
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -1,3 +1,4 @@
+import {sum} from '@tensorflow/tfjs';
 import {JSONAttemptParse} from '@ztimson/utils';
 import {AbortablePromise, Ai} from './ai.ts';
 import {Anthropic} from './antrhopic.ts';
@@ -357,22 +358,64 @@ class LLM {
 	 * @returns {Promise<{} | {} | RegExpExecArray | null>}
 	 */
 	async json(text: string, schema: string, options?: LLMRequest): Promise<any> {
-		const code = await this.code(text, {...options, system: [
-			options?.system,
-			`Only respond using JSON matching this schema:\n\`\`\`json\n${schema}\n\`\`\``
-		].filter(t => !!t).join('\n')});
-		return code ? JSONAttemptParse(code, {}) : null;
+		let system =  `Your job is to convert input to JSON using tool calls. Call the \`submit\` tool at least once with JSON matching this schema:\n\`\`\`json\n${schema}\n\`\`\`\n\nResponses are ignored`;
+		if(options?.system) system += '\n\n' + options.system;
+		return new Promise(async (resolve, reject) => {
+			let done = false;
+			const resp = await this.ask(text, {
+				temperature: 0.3,
+				...options,
+				system,
+				tools: [{
+					name: 'submit',
+					description: 'Submit JSON',
+					args: {json: {type: 'string', description: 'Javascript parsable JSON string', required: true}},
+					fn: (args) => {
+						try {
+							const json = JSON.parse(args.json);
+							resolve(json);
+							done = true;
+						} catch { return 'Invalid JSON'; }
+						return 'Saved';
+					}
+				}, ...(options?.tools || [])],
+			});
+			if(!done) reject(`AI failed to create JSON:\n${resp}`);
+		});
 	}

 	/**
 	 * Create a summary of some text
 	 * @param {string} text Text to summarize
-	 * @param {number} tokens Max number of tokens
+	 * @param {number} length Max number of words
 	 * @param options LLM request options
 	 * @returns {Promise<string>} Summary
 	 */
-	summarize(text: string, tokens: number = 500, options?: LLMRequest): Promise<string | null> {
-		return this.ask(text, {system: `Generate the shortest summary possible <= ${tokens} tokens. Output nothing else`, temperature: 0.3, ...options});
+	async summarize(text: string, length: number = 500, options?: LLMRequest): Promise<string | null> {
+		let system =  `Your job is to summarize the users message using tool calls. Call the \`submit\` tool at least once with the shortest summary possible that's <= ${length} words. The tool call will respond with the token count. Responses are ignored`;
+		if(options?.system) system += '\n\n' + options.system;
+		return new Promise(async (resolve, reject) => {
+			let done = false;
+			const resp = await this.ask(text, {
+				temperature: 0.3,
+				...options,
+				system,
+				tools: [{
+					name: 'submit',
+					description: 'Submit summary',
+					args: {summary: {type: 'string', description: 'Text summarization', required: true}},
+					fn: (args) => {
+						if(!args.summary) return 'No summary provided';
+						const count = args.summary.split(' ').length;
+						if(count > length) return `Too long: ${length} words`;
+						done = true;
+						resolve(args.summary || null);
+						return `Saved: ${length} words`;
+					}
+				}, ...(options?.tools || [])],
+			});
+			if(!done) reject(`AI failed to create summary:\n${resp}`);
+		});
 	}
 }

--- a/src/open-ai.ts
+++ b/src/open-ai.ts
@@ -148,7 +148,7 @@ export class OpenAi extends LLMProvider {
 						try {
 							const args = JSONAttemptParse(toolCall.function.arguments, {});
 							const result = await tool.fn(args, options.stream, this.ai);
-							return {role: 'tool', tool_call_id: toolCall.id, content: JSONSanitize(result)};
+							return {role: 'tool', tool_call_id: toolCall.id, content: typeof result == 'object' ? JSONSanitize(result) : result};
 						} catch (err: any) {
 							return {role: 'tool', tool_call_id: toolCall.id, content: JSONSanitize({error: err?.message || err?.toString() || 'Unknown'})};
 						}
--- a/src/tools.ts
+++ b/src/tools.ts
@@ -1,10 +1,12 @@
 import  * as cheerio from 'cheerio';
 import {$Sync} from '@ztimson/node-utils';
-import {ASet, consoleInterceptor, Http, fn as Fn} from '@ztimson/utils';
+import {ASet, consoleInterceptor, Http, fn as Fn, decodeHtml} from '@ztimson/utils';
 import * as os from 'node:os';
 import {Ai} from './ai.ts';
 import {LLMRequest} from './llm.ts';

+const UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)';
+
 const getShell = () => {
 	if(os.platform() == 'win32') return 'cmd';
 	return $Sync`echo $SHELL`?.split('/').pop() || 'bash';
@@ -129,123 +131,107 @@ export const PythonTool: AiTool = {

 export const ReadWebpageTool: AiTool = {
 	name: 'read_webpage',
-	description: 'Extract clean, structured content from a webpage or convert media/documents to accessible formats',
+	description: 'Extract clean content from webpages, or convert media/documents to accessible formats',
 	args: {
-		url: {type: 'string', description: 'URL to extract content from', required: true},
-		mimeRegex: {type: 'string', description: 'Optional: Regex pattern to filter MIME types (e.g., "^image/", "text/", "application/pdf")'},
-		maxSize: {type: 'number', description: 'Optional: Max file size in bytes for binary content (default: 10MB)'}
+		url: {type: 'string', description: 'URL to read', required: true},
+		mimeRegex: {type: 'string', description: 'Optional regex to filter MIME types (e.g., "^image/", "text/")'}
 	},
-	fn: async (args: {url: string; mimeRegex?: string;}) => {
-		const maxSize = 10 * 1024 * 1024; // 10 MB
+	fn: async (args: {url: string; mimeRegex?: string}) => {
+		const ua = 'AiTools-Webpage/1.0';
+		const maxSize = 10 * 1024 * 1024;

 		const response = await fetch(args.url, {
 			headers: {
-				"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
-				"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
-				"Accept-Language": "en-US,en;q=0.5"
+				'User-Agent': ua,
+				'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+				'Accept-Language': 'en-US,en;q=0.5'
 			},
 			redirect: 'follow'
 		}).catch(err => {throw new Error(`Failed to fetch: ${err.message}`)});

 		const contentType = response.headers.get('content-type') || '';
 		const mimeType = contentType.split(';')[0].trim().toLowerCase();
-		const charset = contentType.match(/charset=([^;]+)/)?.[1] || 'utf-8';

-		// Filter by MIME type if specified
-		if (args.mimeRegex) {
-			const regex = new RegExp(args.mimeRegex, 'i');
-			if (!regex.test(mimeType)) {
-				return {url: args.url, error: 'MIME type rejected', mimeType, filter: args.mimeRegex};
-			}
+		if(args.mimeRegex && !new RegExp(args.mimeRegex, 'i').test(mimeType)) {
+			return `❌ MIME type rejected: ${mimeType} (filter: ${args.mimeRegex})`;
 		}

-		// Handle images, audio, video -> data URL
-		if (mimeType.startsWith('image/') || mimeType.startsWith('audio/') || mimeType.startsWith('video/')) {
+		if(mimeType.match(/^(image|audio|video)\//)) {
 			const buffer = await response.arrayBuffer();
-			if (buffer.byteLength > maxSize) {
-				return {url: args.url, type: 'media', mimeType, error: 'File too large', size: buffer.byteLength, maxSize};
+			if(buffer.byteLength > maxSize) {
+				return `❌ File too large: ${(buffer.byteLength / 1024 / 1024).toFixed(1)}MB (max 10MB)\nType: ${mimeType}`;
 			}
 			const base64 = Buffer.from(buffer).toString('base64');
-			return {url: args.url, type: 'media', mimeType, dataUrl: `data:${mimeType};base64,${base64}`, size: buffer.byteLength};
+			return `## Media File\n**Type:** ${mimeType}\n**Size:** ${(buffer.byteLength / 1024).toFixed(1)}KB\n**Data URL:** \`data:${mimeType};base64,${base64.slice(0, 100)}...\``;
 		}

-		// Handle plain text, json, xml, csv
-		if (mimeType.match(/^(text\/(plain|csv|xml)|application\/(json|xml|csv|x-yaml))/) ||
-			args.url.match(/\.(txt|json|xml|csv|yaml|yml|md)$/i)) {
+		if(mimeType.match(/^text\/(plain|csv|xml)/) || args.url.match(/\.(txt|csv|xml|md|yaml|yml)$/i)) {
 			const text = await response.text();
-			return {url: args.url, type: 'text', mimeType, content: text.slice(0, 100000)};
+			const truncated = text.length > 50000 ? text.slice(0, 50000) : text;
+			return `## Text File\n**Type:** ${mimeType}\n**URL:** ${args.url}\n\n${truncated}`;
 		}

-		// Handle PDFs and other binaries -> data URL
-		if (mimeType === 'application/pdf' || mimeType.startsWith('application/') && !mimeType.includes('html')) {
+		if(mimeType.match(/application\/(json|xml|csv)/)) {
+			const text = await response.text();
+			const truncated = text.length > 50000 ? text.slice(0, 50000) : text;
+			return `## Structured Data\n**Type:** ${mimeType}\n**URL:** ${args.url}\n\n\`\`\`\n${truncated}\n\`\`\``;
+		}
+
+		if(mimeType === 'application/pdf' || (mimeType.startsWith('application/') && !mimeType.includes('html'))) {
 			const buffer = await response.arrayBuffer();
-			if (buffer.byteLength > maxSize) {
-				return {url: args.url, type: 'binary', mimeType, error: 'File too large', size: buffer.byteLength, maxSize};
+			if(buffer.byteLength > maxSize) {
+				return `❌ File too large: ${(buffer.byteLength / 1024 / 1024).toFixed(1)}MB (max 10MB)\nType: ${mimeType}`;
 			}
 			const base64 = Buffer.from(buffer).toString('base64');
-			return {url: args.url, type: 'binary', mimeType, dataUrl: `data:${mimeType};base64,${base64}`, size: buffer.byteLength};
+			return `## Binary File\n**Type:** ${mimeType}\n**Size:** ${(buffer.byteLength / 1024).toFixed(1)}KB\n**Data URL:** \`data:${mimeType};base64,${base64.slice(0, 100)}...\``;
 		}

-		// Default HTML handling
+		// HTML
 		const html = await response.text();
 		const $ = cheerio.load(html);
-
-		// Remove noise
-		$('script, style, nav, footer, header, aside, iframe, noscript, svg, [role="navigation"], [role="banner"], [role="complementary"], .ad, .ads, .advertisement, .cookie, .popup, .modal, .sidebar, .related, .comments, .social-share').remove();
-
-		// Extract metadata
-		const metadata = {
-			title: $('meta[property="og:title"]').attr('content') || $('title').text() || '',
-			description: $('meta[name="description"]').attr('content') || $('meta[property="og:description"]').attr('content') || '',
-			author: $('meta[name="author"]').attr('content') || '',
-			published: $('meta[property="article:published_time"]').attr('content') || $('time').attr('datetime') || '',
-			image: $('meta[property="og:image"]').attr('content') || ''
-		};
-
-		// Extract structured content
+		$('script, style, nav, footer, header, aside, iframe, noscript, svg').remove();
+		$('[role="navigation"], [role="banner"], [role="complementary"]').remove();
+		$('[aria-hidden="true"], [hidden], .visually-hidden, .sr-only, .screen-reader-text').remove();
+		$('.ad, .ads, .advertisement, .cookie, .popup, .modal, .sidebar, .related, .comments, .social-share').remove();
+		$('button, [class*="share"], [class*="follow"], [class*="social"]').remove();
+		const title = $('meta[property="og:title"]').attr('content') || $('title').text().trim() || '';
+		const description = $('meta[name="description"]').attr('content') || $('meta[property="og:description"]').attr('content') || '';
+		const author = $('meta[name="author"]').attr('content') || '';
 		let content = '';
-		const contentSelectors = ['article', 'main', '[role="main"]', '.content', '.post-content', '.entry-content', '.article-content', 'body'];
-		for (const selector of contentSelectors) {
-			const el = $(selector).first();
-			if (el.length && el.text().trim().length > 200) {
-				content = el.text();
-				break;
+		const selectors = ['article', 'main', '[role="main"]', '.content', '.post-content', '.entry-content', '.article-content'];
+		for(const sel of selectors) {
+			const el = $(sel).first();
+			if(el.length && el.text().trim().length > 200) {
+				const paragraphs: string[] = [];
+				el.find('p').each((_, p) => {
+					const text = $(p).text().trim();
+					if(text.length > 80) paragraphs.push(text);
+				});
+				if(paragraphs.length > 2) {
+					content = paragraphs.join('\n\n');
+					break;
+				}
 			}
 		}
-		if (!content) content = $('body').text();

-		// Clean whitespace but preserve structure
-		content = content
-			.replace(/\n\s*\n\s*\n/g, '\n\n')
-			.replace(/[ \t]+/g, ' ')
-			.trim()
-			.slice(0, 50000);
-
-		// Extract links if minimal content
-		let links: any[] = [];
-		if (content.length < 500) {
-			$('a[href]').each((_, el) => {
-				const href = $(el).attr('href');
-				const text = $(el).text().trim();
-				if (href && text && !href.startsWith('#')) {
-					links.push({text, href});
-				}
+		if(!content) {
+			const paragraphs: string[] = [];
+			$('body p').each((_, p) => {
+				const text = $(p).text().trim();
+				if(text.length > 80) paragraphs.push(text);
 			});
-			links = links.slice(0, 50);
+			content = paragraphs.slice(0, 30).join('\n\n');
 		}

-		return {
-			url: args.url,
-			type: 'html',
-			title: metadata.title.trim(),
-			description: metadata.description.trim(),
-			author: metadata.author.trim(),
-			published: metadata.published,
-			content,
-			links: links.length ? links : undefined,
-		};
+		// Decode escaped newlines and clean
+		const parts = [`## ${title || 'Webpage'}`];
+		if(description) parts.push(`_${description}_`);
+		if(author) parts.push(`👤 ${author}`);
+		parts.push(`🔗 ${args.url}\n`);
+		parts.push(content);
+		return decodeHtml(parts.join('\n\n').replaceAll(/\n{3,}/g, '\n\n'));
 	}
-}
+};

 export const WebSearchTool: AiTool = {
 	name: 'web_search',
@@ -259,7 +245,7 @@ export const WebSearchTool: AiTool = {
 		length: number;
 	}) => {
 		const html = await fetch(`https://html.duckduckgo.com/html/?q=${encodeURIComponent(args.query)}`, {
-			headers: {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "Accept-Language": "en-US,en;q=0.9"}
+			headers: {"User-Agent": UA, "Accept-Language": "en-US,en;q=0.9"}
 		}).then(resp => resp.text());
 		let match, regex = /<a .*?href="(.+?)".+?<\/a>/g;
 		const results = new ASet<string>();
@@ -272,3 +258,94 @@ export const WebSearchTool: AiTool = {
 		return results;
 	}
 }
+
+class WikipediaClient {
+	private async get(url: string): Promise<any> {
+		const resp = await fetch(url, {headers: {'User-Agent': UA}});
+		return resp.json();
+	}
+
+	private api(params: Record<string, any>): Promise<any> {
+		const qs = new URLSearchParams({...params, format: 'json', utf8: '1'}).toString();
+		return this.get(`https://en.wikipedia.org/w/api.php?${qs}`);
+	}
+
+	private clean(text: string): string {
+		return text.replace(/\n{3,}/g, '\n\n').replace(/ {2,}/g, ' ').replace(/\[\d+\]/g, '').trim();
+	}
+
+	private truncate(text: string, max: number): string {
+		if(text.length <= max) return text;
+		const cut = text.slice(0, max);
+		const lastPara = cut.lastIndexOf('\n\n');
+		return lastPara > max * 0.7 ? cut.slice(0, lastPara) : cut;
+	}
+
+	private async searchTitles(query: string, limit = 6): Promise<any[]> {
+		const data = await this.api({action: 'query', list: 'search', srsearch: query, srlimit: limit, srprop: 'snippet'});
+		return data.query?.search || [];
+	}
+
+	private async fetchExtract(title: string, intro = false): Promise<string> {
+		const params: any = {action: 'query', prop: 'extracts', titles: title, explaintext: 1, redirects: 1};
+		if(intro) params.exintro = 1;
+		const data = await this.api(params);
+		const page = Object.values(data.query?.pages || {})[0] as any;
+		return this.clean(page?.extract || '');
+	}
+
+	private pageUrl(title: string): string {
+		return `https://en.wikipedia.org/wiki/${encodeURIComponent(title.replace(/ /g, '_'))}`;
+	}
+
+	private stripHtml(text: string): string {
+		return text.replace(/<[^>]+>/g, '');
+	}
+
+	async lookup(query: string, detail: 'intro' | 'full' = 'intro'): Promise<string> {
+		const results = await this.searchTitles(query, 6);
+		if(!results.length) return `❌ No Wikipedia articles found for "${query}"`;
+		const title = results[0].title;
+		const url = this.pageUrl(title);
+		const content = await this.fetchExtract(title, detail === 'intro');
+		const text = this.truncate(content, detail === 'intro' ? 2000 : 8000);
+		return `## ${title}\n🔗 ${url}\n\n${text}`;
+	}
+
+	async search(query: string): Promise<string> {
+		const results = await this.searchTitles(query, 8);
+		if(!results.length) return `❌ No results for "${query}"`;
+		const lines = [`### Search results for "${query}"\n`];
+		for(let i = 0; i < results.length; i++) {
+			const r = results[i];
+			const snippet = this.truncate(this.stripHtml(r.snippet || ''), 150);
+			lines.push(`**${i + 1}. ${r.title}**\n${snippet}\n${this.pageUrl(r.title)}`);
+		}
+		return lines.join('\n\n');
+	}
+}
+
+export const WikipediaLookupTool: AiTool = {
+	name: 'wikipedia_lookup',
+	description: 'Get Wikipedia article content',
+	args: {
+		query: {type: 'string', description: 'Topic or article title', required: true},
+		detail: {type: 'string', description: 'Content level: "intro" (summary, default) or "full" (complete article)', enum: ['intro', 'full'], default: 'intro'}
+	},
+	fn: async (args: {query: string; detail?: 'intro' | 'full'}) => {
+		const wiki = new WikipediaClient();
+		return wiki.lookup(args.query, args.detail || 'intro');
+	}
+};
+
+export const WikipediaSearchTool: AiTool = {
+	name: 'wikipedia_search',
+	description: 'Search Wikipedia for matching articles',
+	args: {
+		query: {type: 'string', description: 'Search terms', required: true}
+	},
+	fn: async (args: {query: string}) => {
+		const wiki = new WikipediaClient();
+		return wiki.search(args.query);
+	}
+};
Author	SHA1	Message	Date
ztimson	ee7b85301b	* Fixed llm response object (double encoding) All checks were successful Publish Library / Build NPM Project (push) Successful in 25s Details Publish Library / Tag Version (push) Successful in 12s Details + added wikitools + Improved webpage reading tool	2026-03-29 23:00:40 -04:00
ztimson	d2e711fbf2	Added wikipedia tools All checks were successful Publish Library / Build NPM Project (push) Successful in 1m5s Details Publish Library / Tag Version (push) Successful in 11s Details	2026-03-29 21:50:26 -04:00
ztimson	596e99daa7	Use word count for summary (more predictable) All checks were successful Publish Library / Build NPM Project (push) Successful in 55s Details Publish Library / Tag Version (push) Successful in 33s Details	2026-03-26 13:10:46 -04:00
ztimson	eda4eed87d	Added JSON / Summary LLM safeguard All checks were successful Publish Library / Build NPM Project (push) Successful in 41s Details Publish Library / Tag Version (push) Successful in 21s Details	2026-03-26 12:50:52 -04:00
ztimson	7f88c2d1d0	Added JSON / Summary LLM safeguard All checks were successful Publish Library / Build NPM Project (push) Successful in 1m17s Details Publish Library / Tag Version (push) Successful in 13s Details	2026-03-26 12:33:50 -04:00
ztimson	5eae84f6cf	Added JSON / Summary LLM safeguard All checks were successful Publish Library / Build NPM Project (push) Successful in 1m1s Details Publish Library / Tag Version (push) Successful in 14s Details	2026-03-26 12:24:20 -04:00