diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts
index 3b8ba7c..bc0b8e9 100644
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -6,6 +6,9 @@ import { allTools, executeTool, getRegisteredTools } from '@/lib/tools';
 // Maximum number of tool call iterations to prevent infinite loops
 const MAX_TOOL_ITERATIONS = 10;
 
+// Maximum tokens for model responses (effectively unlimited for long reasoning)
+const MAX_RESPONSE_TOKENS = 32768;
+
 /**
  * Parse text-based tool calls from model output
  * Supports formats like:
@@ -100,6 +103,9 @@ export async function POST(request: NextRequest) {
               messages: workingMessages,
               tools: enableTools ? allTools : undefined,
               stream: true,
+              options: {
+                num_predict: MAX_RESPONSE_TOKENS,
+              },
             });
 
             let fullContent = '';
diff --git a/app/api/chat/title/route.ts b/app/api/chat/title/route.ts
index b900d27..b03558f 100644
--- a/app/api/chat/title/route.ts
+++ b/app/api/chat/title/route.ts
@@ -60,6 +60,7 @@ Good examples: "Weather in Three Cities", "Python Debugging Help", "Chocolate Ca
         temperature: 0.3, // Lower temperature for more focused output
         num_predict: 20, // Short response - just the title
       },
+      keep_alive: 0, // Immediately unload model after generation
     });
 
     // eslint-disable-next-line no-console
diff --git a/components/Chat/MarkdownMessage.tsx b/components/Chat/MarkdownMessage.tsx
index 6f1a1c3..625800b 100644
--- a/components/Chat/MarkdownMessage.tsx
+++ b/components/Chat/MarkdownMessage.tsx
@@ -18,6 +18,7 @@ import {
   Text,
   Title,
 } from '@mantine/core';
+import { ThinkingBlock } from './ThinkingBlock';
 import { ToolCall, ToolCallGroup } from './ToolCallDisplay';
 import classes from './MarkdownMessage.module.css';
 // Import KaTeX CSS for LaTeX rendering
@@ -35,23 +36,27 @@ interface ParsedToolCall {
 }
 
 interface ContentSegment {
-  type: 'text' | 'tool';
+  type: 'text' | 'tool' | 'thinking';
   content?: string;
   toolCall?: ParsedToolCall;
 }
 
 /**
- * Parse content to extract tool calls and regular text segments
+ * Parse content to extract thinking blocks, tool calls, and regular text segments
  */
 function parseContentWithToolCalls(content: string): ContentSegment[] {
   const segments: ContentSegment[] = [];
-  const toolPattern = /<!--TOOL_START:(\w+):(\{.*?\})-->([\s\S]*?)<!--TOOL_END-->/g;
+
+  // Combined pattern for both thinking blocks and tool calls
+  // This ensures we parse them in the order they appear
+  const combinedPattern =
+    /(<think>([\s\S]*?)<\/think>)|<!--TOOL_START:(\w+):(\{.*?\})-->([\s\S]*?)<!--TOOL_END-->/g;
 
   let lastIndex = 0;
   let match;
 
-  while ((match = toolPattern.exec(content)) !== null) {
-    // Add text before this tool call
+  while ((match = combinedPattern.exec(content)) !== null) {
+    // Add text before this match
     if (match.index > lastIndex) {
       const textBefore = content.slice(lastIndex, match.index).trim();
       if (textBefore) {
@@ -59,25 +64,33 @@ function parseContentWithToolCalls(content: string): ContentSegment[] {
       }
     }
 
-    // Parse the tool call
-    const toolName = match[1];
-    let args: Record<string, unknown> = {};
-    try {
-      args = JSON.parse(match[2]);
-    } catch {
-      // Invalid JSON, use empty args
-    }
-    const result = match[3].trim();
+    if (match[1]) {
+      // This is a thinking block
+      const thinkingContent = match[2].trim();
+      if (thinkingContent) {
+        segments.push({ type: 'thinking', content: thinkingContent });
+      }
+    } else if (match[3]) {
+      // This is a tool call
+      const toolName = match[3];
+      let args: Record<string, unknown> = {};
+      try {
+        args = JSON.parse(match[4]);
+      } catch {
+        // Invalid JSON, use empty args
+      }
+      const result = match[5].trim();
 
-    segments.push({
-      type: 'tool',
-      toolCall: { toolName, args, result },
-    });
+      segments.push({
+        type: 'tool',
+        toolCall: { toolName, args, result },
+      });
+    }
 
     lastIndex = match.index + match[0].length;
   }
 
-  // Add remaining text after last tool call
+  // Add remaining text after last match
   if (lastIndex < content.length) {
     const remainingText = content.slice(lastIndex).trim();
     if (remainingText) {
@@ -85,7 +98,7 @@ function parseContentWithToolCalls(content: string): ContentSegment[] {
     }
   }
 
-  // If no tool calls found, return the whole content as text
+  // If no special blocks found, return the whole content as text
   if (segments.length === 0 && content.trim()) {
     segments.push({ type: 'text', content });
   }
@@ -94,16 +107,16 @@ function parseContentWithToolCalls(content: string): ContentSegment[] {
 }
 
 /**
- * Grouped segment type - either text or a group of consecutive tool calls
+ * Grouped segment type - text, thinking block, or group of consecutive tool calls
  */
 interface GroupedSegment {
-  type: 'text' | 'toolGroup';
+  type: 'text' | 'toolGroup' | 'thinking';
   content?: string;
   toolCalls?: ToolCall[];
 }
 
 /**
- * Group consecutive tool call segments together
+ * Group consecutive tool call segments together, keep thinking blocks separate
  */
 function groupConsecutiveToolCalls(segments: ContentSegment[]): GroupedSegment[] {
   const grouped: GroupedSegment[] = [];
@@ -114,13 +127,17 @@ function groupConsecutiveToolCalls(segments: ContentSegment[]): GroupedSegment[]
       // Add to current tool group
       currentToolGroup.push(segment.toolCall);
     } else {
-      // Flush any pending tool group before adding text
+      // Flush any pending tool group before adding other segment
       if (currentToolGroup.length > 0) {
         grouped.push({ type: 'toolGroup', toolCalls: currentToolGroup });
         currentToolGroup = [];
       }
-      // Add text segment
-      if (segment.content) {
+
+      if (segment.type === 'thinking' && segment.content) {
+        // Add thinking block as-is
+        grouped.push({ type: 'thinking', content: segment.content });
+      } else if (segment.content) {
+        // Add text segment
         grouped.push({ type: 'text', content: segment.content });
       }
     }
@@ -198,10 +215,13 @@ function escapeHtml(text: string): string {
 }
 
 /**
- * Strip tool call markers from content for streaming display
+ * Strip tool call and thinking markers from content for streaming display
  */
 function stripToolMarkers(content: string): string {
-  return content.replace(/<!--TOOL_START:\w+:\{.*?\}-->/g, '').replace(/<!--TOOL_END-->/g, '');
+  return content
+    .replace(/<!--TOOL_START:\w+:\{.*?\}-->/g, '')
+    .replace(/<!--TOOL_END-->/g, '')
+    .replace(/<\/?think>/g, ''); // Remove think tags but keep content visible during streaming
 }
 
 function MarkdownContent({ content }: { content: string }) {
@@ -382,6 +402,9 @@ export function MarkdownMessage({ content, isStreaming = false }: MarkdownMessag
   return (
     <div className={classes.markdown}>
       {groupedSegments.map((segment, index) => {
+        if (segment.type === 'thinking' && segment.content) {
+          return <ThinkingBlock key={`thinking-${index}`} content={segment.content} />;
+        }
         if (segment.type === 'toolGroup' && segment.toolCalls) {
           return <ToolCallGroup key={`toolgroup-${index}`} toolCalls={segment.toolCalls} />;
         }
diff --git a/components/Chat/ThinkingBlock.module.css b/components/Chat/ThinkingBlock.module.css
new file mode 100644
index 0000000..ed4a063
--- /dev/null
+++ b/components/Chat/ThinkingBlock.module.css
@@ -0,0 +1,20 @@
+.container {
+  background-color: var(--mantine-color-default-hover);
+  overflow: hidden;
+}
+
+.header {
+  transition: background-color 0.15s ease;
+}
+
+.header:hover {
+  background-color: var(--mantine-color-default-hover);
+}
+
+.content {
+  padding: var(--mantine-spacing-xs) var(--mantine-spacing-sm);
+  border-top: 1px solid var(--mantine-color-default-border);
+  max-height: 300px;
+  overflow-y: auto;
+  background-color: var(--mantine-color-body);
+}
diff --git a/components/Chat/ThinkingBlock.tsx b/components/Chat/ThinkingBlock.tsx
new file mode 100644
index 0000000..e9e7608
--- /dev/null
+++ b/components/Chat/ThinkingBlock.tsx
@@ -0,0 +1,64 @@
+'use client';
+
+import { useState } from 'react';
+import { IconBrain, IconChevronDown, IconChevronRight } from '@tabler/icons-react';
+import { ActionIcon, Collapse, Group, Paper, Text, useMantineTheme } from '@mantine/core';
+import { useThemeContext } from '@/components/DynamicThemeProvider';
+import classes from './ThinkingBlock.module.css';
+
+interface ThinkingBlockProps {
+  content: string;
+  isStreaming?: boolean;
+}
+
+/**
+ * Collapsible block for displaying model reasoning/thinking content
+ */
+export function ThinkingBlock({ content, isStreaming = false }: ThinkingBlockProps) {
+  const [opened, setOpened] = useState(false);
+  const { primaryColor } = useThemeContext();
+  const theme = useMantineTheme();
+
+  // Don't render if no content
+  if (!content.trim()) {
+    return null;
+  }
+
+  // Count approximate "thoughts" or lines for summary
+  const lines = content.trim().split('\n').filter(Boolean);
+  const wordCount = content.trim().split(/\s+/).length;
+
+  return (
+    <Paper className={classes.container} withBorder radius="sm" p={0} my="xs">
+      <Group
+        className={classes.header}
+        onClick={() => setOpened(!opened)}
+        gap="xs"
+        wrap="nowrap"
+        p="xs"
+        style={{ cursor: 'pointer' }}
+      >
+        <ActionIcon variant="subtle" color={primaryColor} size="xs">
+          {opened ? <IconChevronDown size={14} /> : <IconChevronRight size={14} />}
+        </ActionIcon>
+        <IconBrain size={16} color={theme.colors[primaryColor][6]} />
+        <Text size="sm" fw={500} c={primaryColor}>
+          {isStreaming ? 'Thinking...' : 'Reasoning'}
+        </Text>
+        {!opened && (
+          <Text size="xs" c="dimmed" truncate style={{ flex: 1 }}>
+            {isStreaming ? 'Model is reasoning...' : `${wordCount} words, ${lines.length} steps`}
+          </Text>
+        )}
+      </Group>
+
+      <Collapse in={opened}>
+        <div className={classes.content}>
+          <Text size="xs" c="dimmed" style={{ whiteSpace: 'pre-wrap', lineHeight: 1.5 }}>
+            {content}
+          </Text>
+        </div>
+      </Collapse>
+    </Paper>
+  );
+}