changes
This commit is contained in:
+24
-1
@@ -121,24 +121,41 @@ export async function POST(request: NextRequest) {
|
|||||||
iterations++;
|
iterations++;
|
||||||
|
|
||||||
// Call Ollama with tools if enabled
|
// Call Ollama with tools if enabled
|
||||||
|
// Enable thinking mode for models that support it (like deepseek-r1)
|
||||||
const response = await ollama.chat({
|
const response = await ollama.chat({
|
||||||
model,
|
model,
|
||||||
messages: workingMessages,
|
messages: workingMessages,
|
||||||
tools: enableTools ? allTools : undefined,
|
tools: enableTools ? allTools : undefined,
|
||||||
stream: true,
|
stream: true,
|
||||||
|
think: true, // Enable thinking mode - model will separate thinking from response
|
||||||
options: {
|
options: {
|
||||||
num_predict: MAX_RESPONSE_TOKENS,
|
num_predict: MAX_RESPONSE_TOKENS,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
let fullContent = '';
|
let fullContent = '';
|
||||||
|
let fullThinking = '';
|
||||||
|
let thinkingSent = false;
|
||||||
let toolCalls: Array<{ name: string; arguments: Record<string, unknown> }> = [];
|
let toolCalls: Array<{ name: string; arguments: Record<string, unknown> }> = [];
|
||||||
|
|
||||||
// Process the streaming response
|
// Process the streaming response
|
||||||
for await (const chunk of response) {
|
for await (const chunk of response) {
|
||||||
// Collect content
|
// Check for thinking content (separate from main content)
|
||||||
|
const thinking = chunk.message?.thinking || '';
|
||||||
|
if (thinking) {
|
||||||
|
fullThinking += thinking;
|
||||||
|
// Don't stream thinking - we'll wrap it at the end
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect main content
|
||||||
const text = chunk.message?.content || '';
|
const text = chunk.message?.content || '';
|
||||||
if (text) {
|
if (text) {
|
||||||
|
// If we have accumulated thinking and haven't sent it yet, send it first
|
||||||
|
if (fullThinking && !thinkingSent) {
|
||||||
|
const thinkingMarker = `<think>${fullThinking}</think>\n\n`;
|
||||||
|
controller.enqueue(encoder.encode(thinkingMarker));
|
||||||
|
thinkingSent = true;
|
||||||
|
}
|
||||||
fullContent += text;
|
fullContent += text;
|
||||||
controller.enqueue(encoder.encode(text));
|
controller.enqueue(encoder.encode(text));
|
||||||
}
|
}
|
||||||
@@ -152,6 +169,12 @@ export async function POST(request: NextRequest) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we have thinking that wasn't sent yet (no content followed it), send it now
|
||||||
|
if (fullThinking && !thinkingSent) {
|
||||||
|
const thinkingMarker = `<think>${fullThinking}</think>\n\n`;
|
||||||
|
controller.enqueue(encoder.encode(thinkingMarker));
|
||||||
|
}
|
||||||
|
|
||||||
// If no native tool calls, try to parse text-based tool calls
|
// If no native tool calls, try to parse text-based tool calls
|
||||||
if (toolCalls.length === 0 && enableTools) {
|
if (toolCalls.length === 0 && enableTools) {
|
||||||
toolCalls = parseTextToolCalls(fullContent);
|
toolCalls = parseTextToolCalls(fullContent);
|
||||||
|
|||||||
@@ -41,6 +41,19 @@ interface ContentSegment {
|
|||||||
toolCall?: ParsedToolCall;
|
toolCall?: ParsedToolCall;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clean text content by removing text-based tool call patterns
|
||||||
|
* These are tool calls the model outputs as text (not our structured markers)
|
||||||
|
*/
|
||||||
|
function cleanTextContent(text: string): string {
|
||||||
|
return text
|
||||||
|
.replace(/\w+\[ARGS\]\{[^}]*\}/g, '') // Remove tool_name[ARGS]{...} patterns
|
||||||
|
.replace(/<tool_call>[\s\S]*?<\/tool_call>/g, '') // Remove <tool_call>...</tool_call>
|
||||||
|
.replace(/\{[\s\S]*?"(?:tool|function)"[\s\S]*?\}/g, '') // Remove JSON tool objects
|
||||||
|
.replace(/\n{3,}/g, '\n\n') // Collapse multiple newlines
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse content to extract thinking blocks, tool calls, and regular text segments
|
* Parse content to extract thinking blocks, tool calls, and regular text segments
|
||||||
*/
|
*/
|
||||||
@@ -58,7 +71,7 @@ function parseContentWithToolCalls(content: string): ContentSegment[] {
|
|||||||
while ((match = combinedPattern.exec(content)) !== null) {
|
while ((match = combinedPattern.exec(content)) !== null) {
|
||||||
// Add text before this match
|
// Add text before this match
|
||||||
if (match.index > lastIndex) {
|
if (match.index > lastIndex) {
|
||||||
const textBefore = content.slice(lastIndex, match.index).trim();
|
const textBefore = cleanTextContent(content.slice(lastIndex, match.index));
|
||||||
if (textBefore) {
|
if (textBefore) {
|
||||||
segments.push({ type: 'text', content: textBefore });
|
segments.push({ type: 'text', content: textBefore });
|
||||||
}
|
}
|
||||||
@@ -92,15 +105,18 @@ function parseContentWithToolCalls(content: string): ContentSegment[] {
|
|||||||
|
|
||||||
// Add remaining text after last match
|
// Add remaining text after last match
|
||||||
if (lastIndex < content.length) {
|
if (lastIndex < content.length) {
|
||||||
const remainingText = content.slice(lastIndex).trim();
|
const remainingText = cleanTextContent(content.slice(lastIndex));
|
||||||
if (remainingText) {
|
if (remainingText) {
|
||||||
segments.push({ type: 'text', content: remainingText });
|
segments.push({ type: 'text', content: remainingText });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If no special blocks found, return the whole content as text
|
// If no special blocks found, return the whole content as text (cleaned)
|
||||||
if (segments.length === 0 && content.trim()) {
|
if (segments.length === 0 && content.trim()) {
|
||||||
segments.push({ type: 'text', content });
|
const cleanedContent = cleanTextContent(content);
|
||||||
|
if (cleanedContent) {
|
||||||
|
segments.push({ type: 'text', content: cleanedContent });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return segments;
|
return segments;
|
||||||
@@ -221,7 +237,9 @@ function stripToolMarkers(content: string): string {
|
|||||||
return content
|
return content
|
||||||
.replace(/<!--TOOL_START:\w+:\{.*?\}-->/g, '')
|
.replace(/<!--TOOL_START:\w+:\{.*?\}-->/g, '')
|
||||||
.replace(/<!--TOOL_END-->/g, '')
|
.replace(/<!--TOOL_END-->/g, '')
|
||||||
.replace(/<\/?think>/g, ''); // Remove think tags but keep content visible during streaming
|
.replace(/<\/?think>/g, '') // Remove think tags but keep content visible during streaming
|
||||||
|
.replace(/\w+\[ARGS\]\{[^}]*\}/g, '') // Remove text-based tool calls like get_weather[ARGS]{...}
|
||||||
|
.replace(/<tool_call>[\s\S]*?<\/tool_call>/g, ''); // Remove XML-style tool calls
|
||||||
}
|
}
|
||||||
|
|
||||||
function MarkdownContent({ content }: { content: string }) {
|
function MarkdownContent({ content }: { content: string }) {
|
||||||
|
|||||||
Reference in New Issue
Block a user