diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index 3b8ba7c..bc0b8e9 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -6,6 +6,9 @@ import { allTools, executeTool, getRegisteredTools } from '@/lib/tools'; // Maximum number of tool call iterations to prevent infinite loops const MAX_TOOL_ITERATIONS = 10; +// Maximum tokens for model responses (effectively unlimited for long reasoning) +const MAX_RESPONSE_TOKENS = 32768; + /** * Parse text-based tool calls from model output * Supports formats like: @@ -100,6 +103,9 @@ export async function POST(request: NextRequest) { messages: workingMessages, tools: enableTools ? allTools : undefined, stream: true, + options: { + num_predict: MAX_RESPONSE_TOKENS, + }, }); let fullContent = ''; diff --git a/app/api/chat/title/route.ts b/app/api/chat/title/route.ts index b900d27..b03558f 100644 --- a/app/api/chat/title/route.ts +++ b/app/api/chat/title/route.ts @@ -60,6 +60,7 @@ Good examples: "Weather in Three Cities", "Python Debugging Help", "Chocolate Ca temperature: 0.3, // Lower temperature for more focused output num_predict: 20, // Short response - just the title }, + keep_alive: 0, // Immediately unload model after generation }); // eslint-disable-next-line no-console diff --git a/components/Chat/MarkdownMessage.tsx b/components/Chat/MarkdownMessage.tsx index 6f1a1c3..625800b 100644 --- a/components/Chat/MarkdownMessage.tsx +++ b/components/Chat/MarkdownMessage.tsx @@ -18,6 +18,7 @@ import { Text, Title, } from '@mantine/core'; +import { ThinkingBlock } from './ThinkingBlock'; import { ToolCall, ToolCallGroup } from './ToolCallDisplay'; import classes from './MarkdownMessage.module.css'; // Import KaTeX CSS for LaTeX rendering @@ -35,23 +36,27 @@ interface ParsedToolCall { } interface ContentSegment { - type: 'text' | 'tool'; + type: 'text' | 'tool' | 'thinking'; content?: string; toolCall?: ParsedToolCall; } /** - * Parse content to extract tool calls and regular text segments + * Parse content to extract thinking blocks, tool calls, and regular text segments */ function parseContentWithToolCalls(content: string): ContentSegment[] { const segments: ContentSegment[] = []; - const toolPattern = /([\s\S]*?)/g; + + // Combined pattern for both thinking blocks and tool calls + // This ensures we parse them in the order they appear + const combinedPattern = + /(([\s\S]*?)<\/think>)|([\s\S]*?)/g; let lastIndex = 0; let match; - while ((match = toolPattern.exec(content)) !== null) { - // Add text before this tool call + while ((match = combinedPattern.exec(content)) !== null) { + // Add text before this match if (match.index > lastIndex) { const textBefore = content.slice(lastIndex, match.index).trim(); if (textBefore) { @@ -59,25 +64,33 @@ function parseContentWithToolCalls(content: string): ContentSegment[] { } } - // Parse the tool call - const toolName = match[1]; - let args: Record = {}; - try { - args = JSON.parse(match[2]); - } catch { - // Invalid JSON, use empty args - } - const result = match[3].trim(); + if (match[1]) { + // This is a thinking block + const thinkingContent = match[2].trim(); + if (thinkingContent) { + segments.push({ type: 'thinking', content: thinkingContent }); + } + } else if (match[3]) { + // This is a tool call + const toolName = match[3]; + let args: Record = {}; + try { + args = JSON.parse(match[4]); + } catch { + // Invalid JSON, use empty args + } + const result = match[5].trim(); - segments.push({ - type: 'tool', - toolCall: { toolName, args, result }, - }); + segments.push({ + type: 'tool', + toolCall: { toolName, args, result }, + }); + } lastIndex = match.index + match[0].length; } - // Add remaining text after last tool call + // Add remaining text after last match if (lastIndex < content.length) { const remainingText = content.slice(lastIndex).trim(); if (remainingText) { @@ -85,7 +98,7 @@ function parseContentWithToolCalls(content: string): ContentSegment[] { } } - // If no tool calls found, return the whole content as text + // If no special blocks found, return the whole content as text if (segments.length === 0 && content.trim()) { segments.push({ type: 'text', content }); } @@ -94,16 +107,16 @@ function parseContentWithToolCalls(content: string): ContentSegment[] { } /** - * Grouped segment type - either text or a group of consecutive tool calls + * Grouped segment type - text, thinking block, or group of consecutive tool calls */ interface GroupedSegment { - type: 'text' | 'toolGroup'; + type: 'text' | 'toolGroup' | 'thinking'; content?: string; toolCalls?: ToolCall[]; } /** - * Group consecutive tool call segments together + * Group consecutive tool call segments together, keep thinking blocks separate */ function groupConsecutiveToolCalls(segments: ContentSegment[]): GroupedSegment[] { const grouped: GroupedSegment[] = []; @@ -114,13 +127,17 @@ function groupConsecutiveToolCalls(segments: ContentSegment[]): GroupedSegment[] // Add to current tool group currentToolGroup.push(segment.toolCall); } else { - // Flush any pending tool group before adding text + // Flush any pending tool group before adding other segment if (currentToolGroup.length > 0) { grouped.push({ type: 'toolGroup', toolCalls: currentToolGroup }); currentToolGroup = []; } - // Add text segment - if (segment.content) { + + if (segment.type === 'thinking' && segment.content) { + // Add thinking block as-is + grouped.push({ type: 'thinking', content: segment.content }); + } else if (segment.content) { + // Add text segment grouped.push({ type: 'text', content: segment.content }); } } @@ -198,10 +215,13 @@ function escapeHtml(text: string): string { } /** - * Strip tool call markers from content for streaming display + * Strip tool call and thinking markers from content for streaming display */ function stripToolMarkers(content: string): string { - return content.replace(//g, '').replace(//g, ''); + return content + .replace(//g, '') + .replace(//g, '') + .replace(/<\/?think>/g, ''); // Remove think tags but keep content visible during streaming } function MarkdownContent({ content }: { content: string }) { @@ -382,6 +402,9 @@ export function MarkdownMessage({ content, isStreaming = false }: MarkdownMessag return (
{groupedSegments.map((segment, index) => { + if (segment.type === 'thinking' && segment.content) { + return ; + } if (segment.type === 'toolGroup' && segment.toolCalls) { return ; } diff --git a/components/Chat/ThinkingBlock.module.css b/components/Chat/ThinkingBlock.module.css new file mode 100644 index 0000000..ed4a063 --- /dev/null +++ b/components/Chat/ThinkingBlock.module.css @@ -0,0 +1,20 @@ +.container { + background-color: var(--mantine-color-default-hover); + overflow: hidden; +} + +.header { + transition: background-color 0.15s ease; +} + +.header:hover { + background-color: var(--mantine-color-default-hover); +} + +.content { + padding: var(--mantine-spacing-xs) var(--mantine-spacing-sm); + border-top: 1px solid var(--mantine-color-default-border); + max-height: 300px; + overflow-y: auto; + background-color: var(--mantine-color-body); +} diff --git a/components/Chat/ThinkingBlock.tsx b/components/Chat/ThinkingBlock.tsx new file mode 100644 index 0000000..e9e7608 --- /dev/null +++ b/components/Chat/ThinkingBlock.tsx @@ -0,0 +1,64 @@ +'use client'; + +import { useState } from 'react'; +import { IconBrain, IconChevronDown, IconChevronRight } from '@tabler/icons-react'; +import { ActionIcon, Collapse, Group, Paper, Text, useMantineTheme } from '@mantine/core'; +import { useThemeContext } from '@/components/DynamicThemeProvider'; +import classes from './ThinkingBlock.module.css'; + +interface ThinkingBlockProps { + content: string; + isStreaming?: boolean; +} + +/** + * Collapsible block for displaying model reasoning/thinking content + */ +export function ThinkingBlock({ content, isStreaming = false }: ThinkingBlockProps) { + const [opened, setOpened] = useState(false); + const { primaryColor } = useThemeContext(); + const theme = useMantineTheme(); + + // Don't render if no content + if (!content.trim()) { + return null; + } + + // Count approximate "thoughts" or lines for summary + const lines = content.trim().split('\n').filter(Boolean); + const wordCount = content.trim().split(/\s+/).length; + + return ( + + setOpened(!opened)} + gap="xs" + wrap="nowrap" + p="xs" + style={{ cursor: 'pointer' }} + > + + {opened ? : } + + + + {isStreaming ? 'Thinking...' : 'Reasoning'} + + {!opened && ( + + {isStreaming ? 'Model is reasoning...' : `${wordCount} words, ${lines.length} steps`} + + )} + + + +
+ + {content} + +
+
+
+ ); +}