diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts
index 3bce8e1..cc9a0a9 100644
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -121,24 +121,41 @@ export async function POST(request: NextRequest) {
             iterations++;
 
             // Call Ollama with tools if enabled
+            // Enable thinking mode for models that support it (like deepseek-r1)
             const response = await ollama.chat({
               model,
               messages: workingMessages,
               tools: enableTools ? allTools : undefined,
               stream: true,
+              think: true, // Enable thinking mode - model will separate thinking from response
               options: {
                 num_predict: MAX_RESPONSE_TOKENS,
               },
             });
 
             let fullContent = '';
+            let fullThinking = '';
+            let thinkingSent = false;
             let toolCalls: Array<{ name: string; arguments: Record<string, unknown> }> = [];
 
             // Process the streaming response
             for await (const chunk of response) {
-              // Collect content
+              // Check for thinking content (separate from main content)
+              const thinking = chunk.message?.thinking || '';
+              if (thinking) {
+                fullThinking += thinking;
+                // Don't stream thinking - we'll wrap it at the end
+              }
+
+              // Collect main content
               const text = chunk.message?.content || '';
               if (text) {
+                // If we have accumulated thinking and haven't sent it yet, send it first
+                if (fullThinking && !thinkingSent) {
+                  const thinkingMarker = `<think>${fullThinking}</think>\n\n`;
+                  controller.enqueue(encoder.encode(thinkingMarker));
+                  thinkingSent = true;
+                }
                 fullContent += text;
                 controller.enqueue(encoder.encode(text));
               }
@@ -152,6 +169,12 @@ export async function POST(request: NextRequest) {
               }
             }
 
+            // If we have thinking that wasn't sent yet (no content followed it), send it now
+            if (fullThinking && !thinkingSent) {
+              const thinkingMarker = `<think>${fullThinking}</think>\n\n`;
+              controller.enqueue(encoder.encode(thinkingMarker));
+            }
+
             // If no native tool calls, try to parse text-based tool calls
             if (toolCalls.length === 0 && enableTools) {
               toolCalls = parseTextToolCalls(fullContent);
diff --git a/components/Chat/MarkdownMessage.tsx b/components/Chat/MarkdownMessage.tsx
index 625800b..95ee36f 100644
--- a/components/Chat/MarkdownMessage.tsx
+++ b/components/Chat/MarkdownMessage.tsx
@@ -41,6 +41,19 @@ interface ContentSegment {
   toolCall?: ParsedToolCall;
 }
 
+/**
+ * Clean text content by removing text-based tool call patterns
+ * These are tool calls the model outputs as text (not our structured markers)
+ */
+function cleanTextContent(text: string): string {
+  return text
+    .replace(/\w+\[ARGS\]\{[^}]*\}/g, '') // Remove tool_name[ARGS]{...} patterns
+    .replace(/<tool_call>[\s\S]*?<\/tool_call>/g, '') // Remove <tool_call>...</tool_call>
+    .replace(/\{[\s\S]*?"(?:tool|function)"[\s\S]*?\}/g, '') // Remove JSON tool objects
+    .replace(/\n{3,}/g, '\n\n') // Collapse multiple newlines
+    .trim();
+}
+
 /**
  * Parse content to extract thinking blocks, tool calls, and regular text segments
  */
@@ -58,7 +71,7 @@ function parseContentWithToolCalls(content: string): ContentSegment[] {
   while ((match = combinedPattern.exec(content)) !== null) {
     // Add text before this match
     if (match.index > lastIndex) {
-      const textBefore = content.slice(lastIndex, match.index).trim();
+      const textBefore = cleanTextContent(content.slice(lastIndex, match.index));
       if (textBefore) {
         segments.push({ type: 'text', content: textBefore });
       }
@@ -92,15 +105,18 @@ function parseContentWithToolCalls(content: string): ContentSegment[] {
 
   // Add remaining text after last match
   if (lastIndex < content.length) {
-    const remainingText = content.slice(lastIndex).trim();
+    const remainingText = cleanTextContent(content.slice(lastIndex));
     if (remainingText) {
       segments.push({ type: 'text', content: remainingText });
     }
   }
 
-  // If no special blocks found, return the whole content as text
+  // If no special blocks found, return the whole content as text (cleaned)
   if (segments.length === 0 && content.trim()) {
-    segments.push({ type: 'text', content });
+    const cleanedContent = cleanTextContent(content);
+    if (cleanedContent) {
+      segments.push({ type: 'text', content: cleanedContent });
+    }
   }
 
   return segments;
@@ -221,7 +237,9 @@ function stripToolMarkers(content: string): string {
   return content
     .replace(/<!--TOOL_START:\w+:\{.*?\}-->/g, '')
     .replace(/<!--TOOL_END-->/g, '')
-    .replace(/<\/?think>/g, ''); // Remove think tags but keep content visible during streaming
+    .replace(/<\/?think>/g, '') // Remove think tags but keep content visible during streaming
+    .replace(/\w+\[ARGS\]\{[^}]*\}/g, '') // Remove text-based tool calls like get_weather[ARGS]{...}
+    .replace(/<tool_call>[\s\S]*?<\/tool_call>/g, ''); // Remove XML-style tool calls
 }
 
 function MarkdownContent({ content }: { content: string }) {