diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index 3bce8e1..cc9a0a9 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -121,24 +121,41 @@ export async function POST(request: NextRequest) { iterations++; // Call Ollama with tools if enabled + // Enable thinking mode for models that support it (like deepseek-r1) const response = await ollama.chat({ model, messages: workingMessages, tools: enableTools ? allTools : undefined, stream: true, + think: true, // Enable thinking mode - model will separate thinking from response options: { num_predict: MAX_RESPONSE_TOKENS, }, }); let fullContent = ''; + let fullThinking = ''; + let thinkingSent = false; let toolCalls: Array<{ name: string; arguments: Record }> = []; // Process the streaming response for await (const chunk of response) { - // Collect content + // Check for thinking content (separate from main content) + const thinking = chunk.message?.thinking || ''; + if (thinking) { + fullThinking += thinking; + // Don't stream thinking - we'll wrap it at the end + } + + // Collect main content const text = chunk.message?.content || ''; if (text) { + // If we have accumulated thinking and haven't sent it yet, send it first + if (fullThinking && !thinkingSent) { + const thinkingMarker = `${fullThinking}\n\n`; + controller.enqueue(encoder.encode(thinkingMarker)); + thinkingSent = true; + } fullContent += text; controller.enqueue(encoder.encode(text)); } @@ -152,6 +169,12 @@ export async function POST(request: NextRequest) { } } + // If we have thinking that wasn't sent yet (no content followed it), send it now + if (fullThinking && !thinkingSent) { + const thinkingMarker = `${fullThinking}\n\n`; + controller.enqueue(encoder.encode(thinkingMarker)); + } + // If no native tool calls, try to parse text-based tool calls if (toolCalls.length === 0 && enableTools) { toolCalls = parseTextToolCalls(fullContent); diff --git a/components/Chat/MarkdownMessage.tsx b/components/Chat/MarkdownMessage.tsx index 625800b..95ee36f 100644 --- a/components/Chat/MarkdownMessage.tsx +++ b/components/Chat/MarkdownMessage.tsx @@ -41,6 +41,19 @@ interface ContentSegment { toolCall?: ParsedToolCall; } +/** + * Clean text content by removing text-based tool call patterns + * These are tool calls the model outputs as text (not our structured markers) + */ +function cleanTextContent(text: string): string { + return text + .replace(/\w+\[ARGS\]\{[^}]*\}/g, '') // Remove tool_name[ARGS]{...} patterns + .replace(/[\s\S]*?<\/tool_call>/g, '') // Remove ... + .replace(/\{[\s\S]*?"(?:tool|function)"[\s\S]*?\}/g, '') // Remove JSON tool objects + .replace(/\n{3,}/g, '\n\n') // Collapse multiple newlines + .trim(); +} + /** * Parse content to extract thinking blocks, tool calls, and regular text segments */ @@ -58,7 +71,7 @@ function parseContentWithToolCalls(content: string): ContentSegment[] { while ((match = combinedPattern.exec(content)) !== null) { // Add text before this match if (match.index > lastIndex) { - const textBefore = content.slice(lastIndex, match.index).trim(); + const textBefore = cleanTextContent(content.slice(lastIndex, match.index)); if (textBefore) { segments.push({ type: 'text', content: textBefore }); } @@ -92,15 +105,18 @@ function parseContentWithToolCalls(content: string): ContentSegment[] { // Add remaining text after last match if (lastIndex < content.length) { - const remainingText = content.slice(lastIndex).trim(); + const remainingText = cleanTextContent(content.slice(lastIndex)); if (remainingText) { segments.push({ type: 'text', content: remainingText }); } } - // If no special blocks found, return the whole content as text + // If no special blocks found, return the whole content as text (cleaned) if (segments.length === 0 && content.trim()) { - segments.push({ type: 'text', content }); + const cleanedContent = cleanTextContent(content); + if (cleanedContent) { + segments.push({ type: 'text', content: cleanedContent }); + } } return segments; @@ -221,7 +237,9 @@ function stripToolMarkers(content: string): string { return content .replace(//g, '') .replace(//g, '') - .replace(/<\/?think>/g, ''); // Remove think tags but keep content visible during streaming + .replace(/<\/?think>/g, '') // Remove think tags but keep content visible during streaming + .replace(/\w+\[ARGS\]\{[^}]*\}/g, '') // Remove text-based tool calls like get_weather[ARGS]{...} + .replace(/[\s\S]*?<\/tool_call>/g, ''); // Remove XML-style tool calls } function MarkdownContent({ content }: { content: string }) {