changes

2026-01-14 23:37:00 +01:00
parent fae63deda4
commit 9853ae8fe4
5 changed files with 142 additions and 28 deletions
@@ -6,6 +6,9 @@ import { allTools, executeTool, getRegisteredTools } from '@/lib/tools';
 // Maximum number of tool call iterations to prevent infinite loops
 const MAX_TOOL_ITERATIONS = 10;

+// Maximum tokens for model responses (effectively unlimited for long reasoning)
+const MAX_RESPONSE_TOKENS = 32768;
+
 /**
 * Parse text-based tool calls from model output
 * Supports formats like:
@@ -100,6 +103,9 @@ export async function POST(request: NextRequest) {
              messages: workingMessages,
              tools: enableTools ? allTools : undefined,
              stream: true,
+              options: {
+                num_predict: MAX_RESPONSE_TOKENS,
+              },
            });

            let fullContent = '';
@@ -60,6 +60,7 @@ Good examples: "Weather in Three Cities", "Python Debugging Help", "Chocolate Ca
        temperature: 0.3, // Lower temperature for more focused output
        num_predict: 20, // Short response - just the title
      },
+      keep_alive: 0, // Immediately unload model after generation
    });

    // eslint-disable-next-line no-console