/** * URL Fetch tool - fetch and extract content from web pages */ import { Tool } from 'ollama'; import { ToolHandler, ToolResult } from './types'; export const urlFetchTool: Tool = { type: 'function', function: { name: 'fetch_url', description: 'Fetch content from a URL and extract the main text. Useful for reading articles, documentation, or any web page content.', parameters: { type: 'object', properties: { url: { type: 'string', description: 'The URL to fetch content from', }, max_length: { type: 'number', description: 'Maximum number of characters to return (default: 5000). Longer content will be truncated.', }, }, required: ['url'], }, }, }; // Simple HTML to text conversion function htmlToText(html: string): string { return ( html // Remove scripts and styles .replace(/]*>[\s\S]*?<\/script>/gi, '') .replace(/]*>[\s\S]*?<\/style>/gi, '') // Remove HTML comments .replace(//g, '') // Replace common block elements with newlines .replace(/<\/(p|div|h[1-6]|li|tr|br)[^>]*>/gi, '\n') .replace(/<(br|hr)[^>]*\/?>/gi, '\n') // Remove remaining HTML tags .replace(/<[^>]+>/g, ' ') // Decode common HTML entities .replace(/ /g, ' ') .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'") // Clean up whitespace .replace(/[ \t]+/g, ' ') .replace(/\n\s*\n/g, '\n\n') .trim() ); } export const urlFetchHandler: ToolHandler = async (args): Promise => { const url = args.url as string; const maxLength = (args.max_length as number) || 5000; if (!url) { return { success: false, error: 'No URL provided', }; } // Validate URL try { new URL(url); } catch { return { success: false, error: 'Invalid URL format', }; } try { const response = await fetch(url, { headers: { 'User-Agent': 'Mozilla/5.0 (compatible; ChatGPZ/1.0; +https://github.com/your-repo)', Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', }, signal: AbortSignal.timeout(10000), // 10 second timeout }); if (!response.ok) { return { success: false, error: `HTTP ${response.status}: ${response.statusText}`, }; } const contentType = response.headers.get('content-type') || ''; const html = await response.text(); let text: string; if (contentType.includes('text/plain')) { text = html; } else { text = htmlToText(html); } // Truncate if needed if (text.length > maxLength) { text = `${text.substring(0, maxLength)}\n\n[Content truncated...]`; } return { success: true, result: `Content from ${url}:\n\n${text}`, }; } catch (error) { return { success: false, error: error instanceof Error ? error.message : 'Failed to fetch URL', }; } };