{"version":3,"file":"openai-completions.d.ts","sourceRoot":"","sources":["../../src/providers/openai-completions.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAMX,0BAA0B,EAE1B,MAAM,sCAAsC,CAAC;AAG9C,OAAO,KAAK,EAEX,OAAO,EAEP,KAAK,EACL,uBAAuB,EACvB,mBAAmB,EAEnB,cAAc,EACd,aAAa,EAMb,MAAM,aAAa,CAAC;AA2BrB,MAAM,WAAW,wBAAyB,SAAQ,aAAa;IAC9D,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,UAAU,GAAG;QAAE,IAAI,EAAE,UAAU,CAAC;QAAC,QAAQ,EAAE;YAAE,IAAI,EAAE,MAAM,CAAA;SAAE,CAAA;KAAE,CAAC;IAC7F,eAAe,CAAC,EAAE,SAAS,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,OAAO,CAAC;CAClE;AAED,eAAO,MAAM,uBAAuB,EAAE,cAAc,CAAC,oBAAoB,EAAE,wBAAwB,CAkPlG,CAAC;AAEF,eAAO,MAAM,6BAA6B,EAAE,cAAc,CAAC,oBAAoB,EAAE,mBAAmB,CAmBnG,CAAC;AA8JF,wBAAgB,eAAe,CAC9B,KAAK,EAAE,KAAK,CAAC,oBAAoB,CAAC,EAClC,OAAO,EAAE,OAAO,EAChB,MAAM,EAAE,QAAQ,CAAC,uBAAuB,CAAC,GACvC,0BAA0B,EAAE,CA6N9B","sourcesContent":["import OpenAI from \"openai\";\nimport type {\n\tChatCompletionAssistantMessageParam,\n\tChatCompletionChunk,\n\tChatCompletionContentPart,\n\tChatCompletionContentPartImage,\n\tChatCompletionContentPartText,\n\tChatCompletionMessageParam,\n\tChatCompletionToolMessageParam,\n} from \"openai/resources/chat/completions.js\";\nimport { getEnvApiKey } from \"../env-api-keys.js\";\nimport { calculateCost, supportsXhigh } from \"../models.js\";\nimport type {\n\tAssistantMessage,\n\tContext,\n\tMessage,\n\tModel,\n\tOpenAICompletionsCompat,\n\tSimpleStreamOptions,\n\tStopReason,\n\tStreamFunction,\n\tStreamOptions,\n\tTextContent,\n\tThinkingContent,\n\tTool,\n\tToolCall,\n\tToolResultMessage,\n} from \"../types.js\";\nimport { AssistantMessageEventStream } from \"../utils/event-stream.js\";\nimport { parseStreamingJson } from \"../utils/json-parse.js\";\nimport { sanitizeSurrogates } from \"../utils/sanitize-unicode.js\";\nimport { buildCopilotDynamicHeaders, hasCopilotVisionInput } from \"./github-copilot-headers.js\";\nimport { buildBaseOptions, clampReasoning } from \"./simple-options.js\";\nimport { transformMessages } from \"./transform-messages.js\";\n\n/**\n * Check if conversation messages contain tool calls or tool results.\n * This is needed because Anthropic (via proxy) requires the tools param\n * to be present when messages include tool_calls or tool role messages.\n */\nfunction hasToolHistory(messages: Message[]): boolean {\n\tfor (const msg of messages) {\n\t\tif (msg.role === \"toolResult\") {\n\t\t\treturn true;\n\t\t}\n\t\tif (msg.role === \"assistant\") {\n\t\t\tif (msg.content.some((block) => block.type === \"toolCall\")) {\n\t\t\t\treturn true;\n\t\t\t}\n\t\t}\n\t}\n\treturn false;\n}\n\nexport interface OpenAICompletionsOptions extends StreamOptions {\n\ttoolChoice?: \"auto\" | \"none\" | \"required\" | { type: \"function\"; function: { name: string } };\n\treasoningEffort?: \"minimal\" | \"low\" | \"medium\" | \"high\" | \"xhigh\";\n}\n\nexport const streamOpenAICompletions: StreamFunction<\"openai-completions\", OpenAICompletionsOptions> = (\n\tmodel: Model<\"openai-completions\">,\n\tcontext: Context,\n\toptions?: OpenAICompletionsOptions,\n): AssistantMessageEventStream => {\n\tconst stream = new AssistantMessageEventStream();\n\n\t(async () => {\n\t\tconst output: AssistantMessage = {\n\t\t\trole: \"assistant\",\n\t\t\tcontent: [],\n\t\t\tapi: model.api,\n\t\t\tprovider: model.provider,\n\t\t\tmodel: model.id,\n\t\t\tusage: {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t},\n\t\t\tstopReason: \"stop\",\n\t\t\ttimestamp: Date.now(),\n\t\t};\n\n\t\ttry {\n\t\t\tconst apiKey = options?.apiKey || getEnvApiKey(model.provider) || \"\";\n\t\t\tconst client = createClient(model, context, apiKey, options?.headers);\n\t\t\tlet params = buildParams(model, context, options);\n\t\t\tconst nextParams = await options?.onPayload?.(params, model);\n\t\t\tif (nextParams !== undefined) {\n\t\t\t\tparams = nextParams as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming;\n\t\t\t}\n\t\t\tconst openaiStream = await client.chat.completions.create(params, { signal: options?.signal });\n\t\t\tstream.push({ type: \"start\", partial: output });\n\n\t\t\tlet currentBlock: TextContent | ThinkingContent | (ToolCall & { partialArgs?: string }) | null = null;\n\t\t\tconst blocks = output.content;\n\t\t\tconst blockIndex = () => blocks.length - 1;\n\t\t\tconst finishCurrentBlock = (block?: typeof currentBlock) => {\n\t\t\t\tif (block) {\n\t\t\t\t\tif (block.type === \"text\") {\n\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\ttype: \"text_end\",\n\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\tcontent: block.text,\n\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t});\n\t\t\t\t\t} else if (block.type === \"thinking\") {\n\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\ttype: \"thinking_end\",\n\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\tcontent: block.thinking,\n\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t});\n\t\t\t\t\t} else if (block.type === \"toolCall\") {\n\t\t\t\t\t\tblock.arguments = parseStreamingJson(block.partialArgs);\n\t\t\t\t\t\tdelete block.partialArgs;\n\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\ttype: \"toolcall_end\",\n\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\ttoolCall: block,\n\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t});\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t};\n\n\t\t\tfor await (const chunk of openaiStream) {\n\t\t\t\t// OpenAI documents ChatCompletionChunk.id as the unique chat completion identifier,\n\t\t\t\t// and each chunk in a streamed completion carries the same id.\n\t\t\t\toutput.responseId ||= chunk.id;\n\t\t\t\tif (chunk.usage) {\n\t\t\t\t\toutput.usage = parseChunkUsage(chunk.usage, model);\n\t\t\t\t}\n\n\t\t\t\tconst choice = chunk.choices?.[0];\n\t\t\t\tif (!choice) continue;\n\n\t\t\t\t// Fallback: some providers (e.g., Moonshot) return usage\n\t\t\t\t// in choice.usage instead of the standard chunk.usage\n\t\t\t\tif (!chunk.usage && (choice as any).usage) {\n\t\t\t\t\toutput.usage = parseChunkUsage((choice as any).usage, model);\n\t\t\t\t}\n\n\t\t\t\tif (choice.finish_reason) {\n\t\t\t\t\tconst finishReasonResult = mapStopReason(choice.finish_reason);\n\t\t\t\t\toutput.stopReason = finishReasonResult.stopReason;\n\t\t\t\t\tif (finishReasonResult.errorMessage) {\n\t\t\t\t\t\toutput.errorMessage = finishReasonResult.errorMessage;\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tif (choice.delta) {\n\t\t\t\t\tif (\n\t\t\t\t\t\tchoice.delta.content !== null &&\n\t\t\t\t\t\tchoice.delta.content !== undefined &&\n\t\t\t\t\t\tchoice.delta.content.length > 0\n\t\t\t\t\t) {\n\t\t\t\t\t\tif (!currentBlock || currentBlock.type !== \"text\") {\n\t\t\t\t\t\t\tfinishCurrentBlock(currentBlock);\n\t\t\t\t\t\t\tcurrentBlock = { type: \"text\", text: \"\" };\n\t\t\t\t\t\t\toutput.content.push(currentBlock);\n\t\t\t\t\t\t\tstream.push({ type: \"text_start\", contentIndex: blockIndex(), partial: output });\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\tif (currentBlock.type === \"text\") {\n\t\t\t\t\t\t\tcurrentBlock.text += choice.delta.content;\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"text_delta\",\n\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\tdelta: choice.delta.content,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\t// Some endpoints return reasoning in reasoning_content (llama.cpp),\n\t\t\t\t\t// or reasoning (other openai compatible endpoints)\n\t\t\t\t\t// Use the first non-empty reasoning field to avoid duplication\n\t\t\t\t\t// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)\n\t\t\t\t\tconst reasoningFields = [\"reasoning_content\", \"reasoning\", \"reasoning_text\"];\n\t\t\t\t\tlet foundReasoningField: string | null = null;\n\t\t\t\t\tfor (const field of reasoningFields) {\n\t\t\t\t\t\tif (\n\t\t\t\t\t\t\t(choice.delta as any)[field] !== null &&\n\t\t\t\t\t\t\t(choice.delta as any)[field] !== undefined &&\n\t\t\t\t\t\t\t(choice.delta as any)[field].length > 0\n\t\t\t\t\t\t) {\n\t\t\t\t\t\t\tif (!foundReasoningField) {\n\t\t\t\t\t\t\t\tfoundReasoningField = field;\n\t\t\t\t\t\t\t\tbreak;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\tif (foundReasoningField) {\n\t\t\t\t\t\tif (!currentBlock || currentBlock.type !== \"thinking\") {\n\t\t\t\t\t\t\tfinishCurrentBlock(currentBlock);\n\t\t\t\t\t\t\tcurrentBlock = {\n\t\t\t\t\t\t\t\ttype: \"thinking\",\n\t\t\t\t\t\t\t\tthinking: \"\",\n\t\t\t\t\t\t\t\tthinkingSignature: foundReasoningField,\n\t\t\t\t\t\t\t};\n\t\t\t\t\t\t\toutput.content.push(currentBlock);\n\t\t\t\t\t\t\tstream.push({ type: \"thinking_start\", contentIndex: blockIndex(), partial: output });\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\tif (currentBlock.type === \"thinking\") {\n\t\t\t\t\t\t\tconst delta = (choice.delta as any)[foundReasoningField];\n\t\t\t\t\t\t\tcurrentBlock.thinking += delta;\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"thinking_delta\",\n\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\tdelta,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\tif (choice?.delta?.tool_calls) {\n\t\t\t\t\t\tfor (const toolCall of choice.delta.tool_calls) {\n\t\t\t\t\t\t\tif (\n\t\t\t\t\t\t\t\t!currentBlock ||\n\t\t\t\t\t\t\t\tcurrentBlock.type !== \"toolCall\" ||\n\t\t\t\t\t\t\t\t(toolCall.id && currentBlock.id !== toolCall.id)\n\t\t\t\t\t\t\t) {\n\t\t\t\t\t\t\t\tfinishCurrentBlock(currentBlock);\n\t\t\t\t\t\t\t\tcurrentBlock = {\n\t\t\t\t\t\t\t\t\ttype: \"toolCall\",\n\t\t\t\t\t\t\t\t\tid: toolCall.id || \"\",\n\t\t\t\t\t\t\t\t\tname: toolCall.function?.name || \"\",\n\t\t\t\t\t\t\t\t\targuments: {},\n\t\t\t\t\t\t\t\t\tpartialArgs: \"\",\n\t\t\t\t\t\t\t\t};\n\t\t\t\t\t\t\t\toutput.content.push(currentBlock);\n\t\t\t\t\t\t\t\tstream.push({ type: \"toolcall_start\", contentIndex: blockIndex(), partial: output });\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tif (currentBlock.type === \"toolCall\") {\n\t\t\t\t\t\t\t\tif (toolCall.id) currentBlock.id = toolCall.id;\n\t\t\t\t\t\t\t\tif (toolCall.function?.name) currentBlock.name = toolCall.function.name;\n\t\t\t\t\t\t\t\tlet delta = \"\";\n\t\t\t\t\t\t\t\tif (toolCall.function?.arguments) {\n\t\t\t\t\t\t\t\t\tdelta = toolCall.function.arguments;\n\t\t\t\t\t\t\t\t\tcurrentBlock.partialArgs += toolCall.function.arguments;\n\t\t\t\t\t\t\t\t\tcurrentBlock.arguments = parseStreamingJson(currentBlock.partialArgs);\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\ttype: \"toolcall_delta\",\n\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\tdelta,\n\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\tconst reasoningDetails = (choice.delta as any).reasoning_details;\n\t\t\t\t\tif (reasoningDetails && Array.isArray(reasoningDetails)) {\n\t\t\t\t\t\tfor (const detail of reasoningDetails) {\n\t\t\t\t\t\t\tif (detail.type === \"reasoning.encrypted\" && detail.id && detail.data) {\n\t\t\t\t\t\t\t\tconst matchingToolCall = output.content.find(\n\t\t\t\t\t\t\t\t\t(b) => b.type === \"toolCall\" && b.id === detail.id,\n\t\t\t\t\t\t\t\t) as ToolCall | undefined;\n\t\t\t\t\t\t\t\tif (matchingToolCall) {\n\t\t\t\t\t\t\t\t\tmatchingToolCall.thoughtSignature = JSON.stringify(detail);\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tfinishCurrentBlock(currentBlock);\n\t\t\tif (options?.signal?.aborted) {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\n\t\t\tif (output.stopReason === \"aborted\") {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\t\t\tif (output.stopReason === \"error\") {\n\t\t\t\tthrow new Error(output.errorMessage || \"Provider returned an error stop reason\");\n\t\t\t}\n\n\t\t\tstream.push({ type: \"done\", reason: output.stopReason, message: output });\n\t\t\tstream.end();\n\t\t} catch (error) {\n\t\t\tfor (const block of output.content) delete (block as any).index;\n\t\t\toutput.stopReason = options?.signal?.aborted ? \"aborted\" : \"error\";\n\t\t\toutput.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);\n\t\t\t// Some providers via OpenRouter give additional information in this field.\n\t\t\tconst rawMetadata = (error as any)?.error?.metadata?.raw;\n\t\t\tif (rawMetadata) output.errorMessage += `\\n${rawMetadata}`;\n\t\t\tstream.push({ type: \"error\", reason: output.stopReason, error: output });\n\t\t\tstream.end();\n\t\t}\n\t})();\n\n\treturn stream;\n};\n\nexport const streamSimpleOpenAICompletions: StreamFunction<\"openai-completions\", SimpleStreamOptions> = (\n\tmodel: Model<\"openai-completions\">,\n\tcontext: Context,\n\toptions?: SimpleStreamOptions,\n): AssistantMessageEventStream => {\n\tconst apiKey = options?.apiKey || getEnvApiKey(model.provider);\n\tif (!apiKey) {\n\t\tthrow new Error(`No API key for provider: ${model.provider}`);\n\t}\n\n\tconst base = buildBaseOptions(model, options, apiKey);\n\tconst reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);\n\tconst toolChoice = (options as OpenAICompletionsOptions | undefined)?.toolChoice;\n\n\treturn streamOpenAICompletions(model, context, {\n\t\t...base,\n\t\treasoningEffort,\n\t\ttoolChoice,\n\t} satisfies OpenAICompletionsOptions);\n};\n\nfunction createClient(\n\tmodel: Model<\"openai-completions\">,\n\tcontext: Context,\n\tapiKey?: string,\n\toptionsHeaders?: Record,\n) {\n\tif (!apiKey) {\n\t\tif (!process.env.OPENAI_API_KEY) {\n\t\t\tthrow new Error(\n\t\t\t\t\"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.\",\n\t\t\t);\n\t\t}\n\t\tapiKey = process.env.OPENAI_API_KEY;\n\t}\n\n\tconst headers = { ...model.headers };\n\tif (model.provider === \"github-copilot\") {\n\t\tconst hasImages = hasCopilotVisionInput(context.messages);\n\t\tconst copilotHeaders = buildCopilotDynamicHeaders({\n\t\t\tmessages: context.messages,\n\t\t\thasImages,\n\t\t});\n\t\tObject.assign(headers, copilotHeaders);\n\t}\n\n\t// Merge options headers last so they can override defaults\n\tif (optionsHeaders) {\n\t\tObject.assign(headers, optionsHeaders);\n\t}\n\n\treturn new OpenAI({\n\t\tapiKey,\n\t\tbaseURL: model.baseUrl,\n\t\tdangerouslyAllowBrowser: true,\n\t\tdefaultHeaders: headers,\n\t});\n}\n\nfunction buildParams(model: Model<\"openai-completions\">, context: Context, options?: OpenAICompletionsOptions) {\n\tconst compat = getCompat(model);\n\tconst messages = convertMessages(model, context, compat);\n\tmaybeAddOpenRouterAnthropicCacheControl(model, messages);\n\n\tconst params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {\n\t\tmodel: model.id,\n\t\tmessages,\n\t\tstream: true,\n\t};\n\n\tif (compat.supportsUsageInStreaming !== false) {\n\t\t(params as any).stream_options = { include_usage: true };\n\t}\n\n\tif (compat.supportsStore) {\n\t\tparams.store = false;\n\t}\n\n\tif (options?.maxTokens) {\n\t\tif (compat.maxTokensField === \"max_tokens\") {\n\t\t\t(params as any).max_tokens = options.maxTokens;\n\t\t} else {\n\t\t\tparams.max_completion_tokens = options.maxTokens;\n\t\t}\n\t}\n\n\tif (options?.temperature !== undefined) {\n\t\tparams.temperature = options.temperature;\n\t}\n\n\tif (context.tools) {\n\t\tparams.tools = convertTools(context.tools, compat);\n\t} else if (hasToolHistory(context.messages)) {\n\t\t// Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results\n\t\tparams.tools = [];\n\t}\n\n\tif (options?.toolChoice) {\n\t\tparams.tool_choice = options.toolChoice;\n\t}\n\n\tif (compat.thinkingFormat === \"zai\" && model.reasoning) {\n\t\t(params as any).enable_thinking = !!options?.reasoningEffort;\n\t} else if (compat.thinkingFormat === \"qwen\" && model.reasoning) {\n\t\t(params as any).enable_thinking = !!options?.reasoningEffort;\n\t} else if (compat.thinkingFormat === \"qwen-chat-template\" && model.reasoning) {\n\t\t(params as any).chat_template_kwargs = { enable_thinking: !!options?.reasoningEffort };\n\t} else if (compat.thinkingFormat === \"openrouter\" && options?.reasoningEffort && model.reasoning) {\n\t\t// OpenRouter normalizes reasoning across providers via a nested reasoning object.\n\t\tconst openRouterParams = params as typeof params & { reasoning?: { effort?: string } };\n\t\topenRouterParams.reasoning = {\n\t\t\teffort: mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap),\n\t\t};\n\t} else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {\n\t\t// OpenAI-style reasoning_effort\n\t\t(params as any).reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap);\n\t}\n\n\t// OpenRouter provider routing preferences\n\tif (model.baseUrl.includes(\"openrouter.ai\") && model.compat?.openRouterRouting) {\n\t\t(params as any).provider = model.compat.openRouterRouting;\n\t}\n\n\t// Vercel AI Gateway provider routing preferences\n\tif (model.baseUrl.includes(\"ai-gateway.vercel.sh\") && model.compat?.vercelGatewayRouting) {\n\t\tconst routing = model.compat.vercelGatewayRouting;\n\t\tif (routing.only || routing.order) {\n\t\t\tconst gatewayOptions: Record = {};\n\t\t\tif (routing.only) gatewayOptions.only = routing.only;\n\t\t\tif (routing.order) gatewayOptions.order = routing.order;\n\t\t\t(params as any).providerOptions = { gateway: gatewayOptions };\n\t\t}\n\t}\n\n\treturn params;\n}\n\nfunction mapReasoningEffort(\n\teffort: NonNullable,\n\treasoningEffortMap: Partial, string>>,\n): string {\n\treturn reasoningEffortMap[effort] ?? effort;\n}\n\nfunction maybeAddOpenRouterAnthropicCacheControl(\n\tmodel: Model<\"openai-completions\">,\n\tmessages: ChatCompletionMessageParam[],\n): void {\n\tif (model.provider !== \"openrouter\" || !model.id.startsWith(\"anthropic/\")) return;\n\n\t// Anthropic-style caching requires cache_control on a text part. Add a breakpoint\n\t// on the last user/assistant message (walking backwards until we find text content).\n\tfor (let i = messages.length - 1; i >= 0; i--) {\n\t\tconst msg = messages[i];\n\t\tif (msg.role !== \"user\" && msg.role !== \"assistant\") continue;\n\n\t\tconst content = msg.content;\n\t\tif (typeof content === \"string\") {\n\t\t\tmsg.content = [\n\t\t\t\tObject.assign({ type: \"text\" as const, text: content }, { cache_control: { type: \"ephemeral\" } }),\n\t\t\t];\n\t\t\treturn;\n\t\t}\n\n\t\tif (!Array.isArray(content)) continue;\n\n\t\t// Find last text part and add cache_control\n\t\tfor (let j = content.length - 1; j >= 0; j--) {\n\t\t\tconst part = content[j];\n\t\t\tif (part?.type === \"text\") {\n\t\t\t\tObject.assign(part, { cache_control: { type: \"ephemeral\" } });\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t}\n}\n\nexport function convertMessages(\n\tmodel: Model<\"openai-completions\">,\n\tcontext: Context,\n\tcompat: Required,\n): ChatCompletionMessageParam[] {\n\tconst params: ChatCompletionMessageParam[] = [];\n\n\tconst normalizeToolCallId = (id: string): string => {\n\t\t// Handle pipe-separated IDs from OpenAI Responses API\n\t\t// Format: {call_id}|{id} where {id} can be 400+ chars with special chars (+, /, =)\n\t\t// These come from providers like github-copilot, openai-codex, opencode\n\t\t// Extract just the call_id part and normalize it\n\t\tif (id.includes(\"|\")) {\n\t\t\tconst [callId] = id.split(\"|\");\n\t\t\t// Sanitize to allowed chars and truncate to 40 chars (OpenAI limit)\n\t\t\treturn callId.replace(/[^a-zA-Z0-9_-]/g, \"_\").slice(0, 40);\n\t\t}\n\n\t\tif (model.provider === \"openai\") return id.length > 40 ? id.slice(0, 40) : id;\n\t\treturn id;\n\t};\n\n\tconst transformedMessages = transformMessages(context.messages, model, (id) => normalizeToolCallId(id));\n\n\tif (context.systemPrompt) {\n\t\tconst useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;\n\t\tconst role = useDeveloperRole ? \"developer\" : \"system\";\n\t\tparams.push({ role: role, content: sanitizeSurrogates(context.systemPrompt) });\n\t}\n\n\tlet lastRole: string | null = null;\n\n\tfor (let i = 0; i < transformedMessages.length; i++) {\n\t\tconst msg = transformedMessages[i];\n\t\t// Some providers don't allow user messages directly after tool results\n\t\t// Insert a synthetic assistant message to bridge the gap\n\t\tif (compat.requiresAssistantAfterToolResult && lastRole === \"toolResult\" && msg.role === \"user\") {\n\t\t\tparams.push({\n\t\t\t\trole: \"assistant\",\n\t\t\t\tcontent: \"I have processed the tool results.\",\n\t\t\t});\n\t\t}\n\n\t\tif (msg.role === \"user\") {\n\t\t\tif (typeof msg.content === \"string\") {\n\t\t\t\tparams.push({\n\t\t\t\t\trole: \"user\",\n\t\t\t\t\tcontent: sanitizeSurrogates(msg.content),\n\t\t\t\t});\n\t\t\t} else {\n\t\t\t\tconst content: ChatCompletionContentPart[] = msg.content.map((item): ChatCompletionContentPart => {\n\t\t\t\t\tif (item.type === \"text\") {\n\t\t\t\t\t\treturn {\n\t\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\t\ttext: sanitizeSurrogates(item.text),\n\t\t\t\t\t\t} satisfies ChatCompletionContentPartText;\n\t\t\t\t\t} else {\n\t\t\t\t\t\treturn {\n\t\t\t\t\t\t\ttype: \"image_url\",\n\t\t\t\t\t\t\timage_url: {\n\t\t\t\t\t\t\t\turl: `data:${item.mimeType};base64,${item.data}`,\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t} satisfies ChatCompletionContentPartImage;\n\t\t\t\t\t}\n\t\t\t\t});\n\t\t\t\tconst filteredContent = !model.input.includes(\"image\")\n\t\t\t\t\t? content.filter((c) => c.type !== \"image_url\")\n\t\t\t\t\t: content;\n\t\t\t\tif (filteredContent.length === 0) continue;\n\t\t\t\tparams.push({\n\t\t\t\t\trole: \"user\",\n\t\t\t\t\tcontent: filteredContent,\n\t\t\t\t});\n\t\t\t}\n\t\t} else if (msg.role === \"assistant\") {\n\t\t\t// Some providers don't accept null content, use empty string instead\n\t\t\tconst assistantMsg: ChatCompletionAssistantMessageParam = {\n\t\t\t\trole: \"assistant\",\n\t\t\t\tcontent: compat.requiresAssistantAfterToolResult ? \"\" : null,\n\t\t\t};\n\n\t\t\tconst textBlocks = msg.content.filter((b) => b.type === \"text\") as TextContent[];\n\t\t\t// Filter out empty text blocks to avoid API validation errors\n\t\t\tconst nonEmptyTextBlocks = textBlocks.filter((b) => b.text && b.text.trim().length > 0);\n\t\t\tif (nonEmptyTextBlocks.length > 0) {\n\t\t\t\t// Always send assistant content as a plain string (OpenAI Chat Completions\n\t\t\t\t// API standard format). Sending as an array of {type:\"text\", text:\"...\"}\n\t\t\t\t// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via\n\t\t\t\t// NVIDIA NIM) to mirror the content-block structure literally in their\n\t\t\t\t// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].\n\t\t\t\tassistantMsg.content = nonEmptyTextBlocks.map((b) => sanitizeSurrogates(b.text)).join(\"\");\n\t\t\t}\n\n\t\t\t// Handle thinking blocks\n\t\t\tconst thinkingBlocks = msg.content.filter((b) => b.type === \"thinking\") as ThinkingContent[];\n\t\t\t// Filter out empty thinking blocks to avoid API validation errors\n\t\t\tconst nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);\n\t\t\tif (nonEmptyThinkingBlocks.length > 0) {\n\t\t\t\tif (compat.requiresThinkingAsText) {\n\t\t\t\t\t// Convert thinking blocks to plain text (no tags to avoid model mimicking them)\n\t\t\t\t\tconst thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join(\"\\n\\n\");\n\t\t\t\t\tconst textContent = assistantMsg.content as Array<{ type: \"text\"; text: string }> | null;\n\t\t\t\t\tif (textContent) {\n\t\t\t\t\t\ttextContent.unshift({ type: \"text\", text: thinkingText });\n\t\t\t\t\t} else {\n\t\t\t\t\t\tassistantMsg.content = [{ type: \"text\", text: thinkingText }];\n\t\t\t\t\t}\n\t\t\t\t} else {\n\t\t\t\t\t// Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)\n\t\t\t\t\tconst signature = nonEmptyThinkingBlocks[0].thinkingSignature;\n\t\t\t\t\tif (signature && signature.length > 0) {\n\t\t\t\t\t\t(assistantMsg as any)[signature] = nonEmptyThinkingBlocks.map((b) => b.thinking).join(\"\\n\");\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tconst toolCalls = msg.content.filter((b) => b.type === \"toolCall\") as ToolCall[];\n\t\t\tif (toolCalls.length > 0) {\n\t\t\t\tassistantMsg.tool_calls = toolCalls.map((tc) => ({\n\t\t\t\t\tid: tc.id,\n\t\t\t\t\ttype: \"function\" as const,\n\t\t\t\t\tfunction: {\n\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\targuments: JSON.stringify(tc.arguments),\n\t\t\t\t\t},\n\t\t\t\t}));\n\t\t\t\tconst reasoningDetails = toolCalls\n\t\t\t\t\t.filter((tc) => tc.thoughtSignature)\n\t\t\t\t\t.map((tc) => {\n\t\t\t\t\t\ttry {\n\t\t\t\t\t\t\treturn JSON.parse(tc.thoughtSignature!);\n\t\t\t\t\t\t} catch {\n\t\t\t\t\t\t\treturn null;\n\t\t\t\t\t\t}\n\t\t\t\t\t})\n\t\t\t\t\t.filter(Boolean);\n\t\t\t\tif (reasoningDetails.length > 0) {\n\t\t\t\t\t(assistantMsg as any).reasoning_details = reasoningDetails;\n\t\t\t\t}\n\t\t\t}\n\t\t\t// Skip assistant messages that have no content and no tool calls.\n\t\t\t// Some providers require \"either content or tool_calls, but not none\".\n\t\t\t// Other providers also don't accept empty assistant messages.\n\t\t\t// This handles aborted assistant responses that got no content.\n\t\t\tconst content = assistantMsg.content;\n\t\t\tconst hasContent =\n\t\t\t\tcontent !== null &&\n\t\t\t\tcontent !== undefined &&\n\t\t\t\t(typeof content === \"string\" ? content.length > 0 : content.length > 0);\n\t\t\tif (!hasContent && !assistantMsg.tool_calls) {\n\t\t\t\tcontinue;\n\t\t\t}\n\t\t\tparams.push(assistantMsg);\n\t\t} else if (msg.role === \"toolResult\") {\n\t\t\tconst imageBlocks: Array<{ type: \"image_url\"; image_url: { url: string } }> = [];\n\t\t\tlet j = i;\n\n\t\t\tfor (; j < transformedMessages.length && transformedMessages[j].role === \"toolResult\"; j++) {\n\t\t\t\tconst toolMsg = transformedMessages[j] as ToolResultMessage;\n\n\t\t\t\t// Extract text and image content\n\t\t\t\tconst textResult = toolMsg.content\n\t\t\t\t\t.filter((c) => c.type === \"text\")\n\t\t\t\t\t.map((c) => (c as any).text)\n\t\t\t\t\t.join(\"\\n\");\n\t\t\t\tconst hasImages = toolMsg.content.some((c) => c.type === \"image\");\n\n\t\t\t\t// Always send tool result with text (or placeholder if only images)\n\t\t\t\tconst hasText = textResult.length > 0;\n\t\t\t\t// Some providers require the 'name' field in tool results\n\t\t\t\tconst toolResultMsg: ChatCompletionToolMessageParam = {\n\t\t\t\t\trole: \"tool\",\n\t\t\t\t\tcontent: sanitizeSurrogates(hasText ? textResult : \"(see attached image)\"),\n\t\t\t\t\ttool_call_id: toolMsg.toolCallId,\n\t\t\t\t};\n\t\t\t\tif (compat.requiresToolResultName && toolMsg.toolName) {\n\t\t\t\t\t(toolResultMsg as any).name = toolMsg.toolName;\n\t\t\t\t}\n\t\t\t\tparams.push(toolResultMsg);\n\n\t\t\t\tif (hasImages && model.input.includes(\"image\")) {\n\t\t\t\t\tfor (const block of toolMsg.content) {\n\t\t\t\t\t\tif (block.type === \"image\") {\n\t\t\t\t\t\t\timageBlocks.push({\n\t\t\t\t\t\t\t\ttype: \"image_url\",\n\t\t\t\t\t\t\t\timage_url: {\n\t\t\t\t\t\t\t\t\turl: `data:${(block as any).mimeType};base64,${(block as any).data}`,\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\ti = j - 1;\n\n\t\t\tif (imageBlocks.length > 0) {\n\t\t\t\tif (compat.requiresAssistantAfterToolResult) {\n\t\t\t\t\tparams.push({\n\t\t\t\t\t\trole: \"assistant\",\n\t\t\t\t\t\tcontent: \"I have processed the tool results.\",\n\t\t\t\t\t});\n\t\t\t\t}\n\n\t\t\t\tparams.push({\n\t\t\t\t\trole: \"user\",\n\t\t\t\t\tcontent: [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\t\ttext: \"Attached image(s) from tool result:\",\n\t\t\t\t\t\t},\n\t\t\t\t\t\t...imageBlocks,\n\t\t\t\t\t],\n\t\t\t\t});\n\t\t\t\tlastRole = \"user\";\n\t\t\t} else {\n\t\t\t\tlastRole = \"toolResult\";\n\t\t\t}\n\t\t\tcontinue;\n\t\t}\n\n\t\tlastRole = msg.role;\n\t}\n\n\treturn params;\n}\n\nfunction convertTools(\n\ttools: Tool[],\n\tcompat: Required,\n): OpenAI.Chat.Completions.ChatCompletionTool[] {\n\treturn tools.map((tool) => ({\n\t\ttype: \"function\",\n\t\tfunction: {\n\t\t\tname: tool.name,\n\t\t\tdescription: tool.description,\n\t\t\tparameters: tool.parameters as any, // TypeBox already generates JSON Schema\n\t\t\t// Only include strict if provider supports it. Some reject unknown fields.\n\t\t\t...(compat.supportsStrictMode !== false && { strict: false }),\n\t\t},\n\t}));\n}\n\nfunction parseChunkUsage(\n\trawUsage: {\n\t\tprompt_tokens?: number;\n\t\tcompletion_tokens?: number;\n\t\tprompt_tokens_details?: { cached_tokens?: number };\n\t\tcompletion_tokens_details?: { reasoning_tokens?: number };\n\t},\n\tmodel: Model<\"openai-completions\">,\n): AssistantMessage[\"usage\"] {\n\tconst cachedTokens = rawUsage.prompt_tokens_details?.cached_tokens || 0;\n\tconst reasoningTokens = rawUsage.completion_tokens_details?.reasoning_tokens || 0;\n\t// OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input\n\tconst input = (rawUsage.prompt_tokens || 0) - cachedTokens;\n\t// Compute totalTokens ourselves since we add reasoning_tokens to output\n\t// and some providers (e.g., Groq) don't include them in total_tokens\n\tconst outputTokens = (rawUsage.completion_tokens || 0) + reasoningTokens;\n\tconst usage: AssistantMessage[\"usage\"] = {\n\t\tinput,\n\t\toutput: outputTokens,\n\t\tcacheRead: cachedTokens,\n\t\tcacheWrite: 0,\n\t\ttotalTokens: input + outputTokens + cachedTokens,\n\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t};\n\tcalculateCost(model, usage);\n\treturn usage;\n}\n\nfunction mapStopReason(reason: ChatCompletionChunk.Choice[\"finish_reason\"] | string): {\n\tstopReason: StopReason;\n\terrorMessage?: string;\n} {\n\tif (reason === null) return { stopReason: \"stop\" };\n\tswitch (reason) {\n\t\tcase \"stop\":\n\t\tcase \"end\":\n\t\t\treturn { stopReason: \"stop\" };\n\t\tcase \"length\":\n\t\t\treturn { stopReason: \"length\" };\n\t\tcase \"function_call\":\n\t\tcase \"tool_calls\":\n\t\t\treturn { stopReason: \"toolUse\" };\n\t\tcase \"content_filter\":\n\t\t\treturn { stopReason: \"error\", errorMessage: \"Provider finish_reason: content_filter\" };\n\t\tcase \"network_error\":\n\t\t\treturn { stopReason: \"error\", errorMessage: \"Provider finish_reason: network_error\" };\n\t\tdefault:\n\t\t\treturn {\n\t\t\t\tstopReason: \"error\",\n\t\t\t\terrorMessage: `Provider finish_reason: ${reason}`,\n\t\t\t};\n\t}\n}\n\n/**\n * Detect compatibility settings from provider and baseUrl for known providers.\n * Provider takes precedence over URL-based detection since it's explicitly configured.\n * Returns a fully resolved OpenAICompletionsCompat object with all fields set.\n */\nfunction detectCompat(model: Model<\"openai-completions\">): Required {\n\tconst provider = model.provider;\n\tconst baseUrl = model.baseUrl;\n\n\tconst isZai = provider === \"zai\" || baseUrl.includes(\"api.z.ai\");\n\n\tconst isNonStandard =\n\t\tprovider === \"cerebras\" ||\n\t\tbaseUrl.includes(\"cerebras.ai\") ||\n\t\tprovider === \"xai\" ||\n\t\tbaseUrl.includes(\"api.x.ai\") ||\n\t\tbaseUrl.includes(\"chutes.ai\") ||\n\t\tbaseUrl.includes(\"deepseek.com\") ||\n\t\tisZai ||\n\t\tprovider === \"opencode\" ||\n\t\tbaseUrl.includes(\"opencode.ai\");\n\n\tconst useMaxTokens = baseUrl.includes(\"chutes.ai\");\n\n\tconst isGrok = provider === \"xai\" || baseUrl.includes(\"api.x.ai\");\n\tconst isGroq = provider === \"groq\" || baseUrl.includes(\"groq.com\");\n\n\tconst reasoningEffortMap =\n\t\tisGroq && model.id === \"qwen/qwen3-32b\"\n\t\t\t? {\n\t\t\t\t\tminimal: \"default\",\n\t\t\t\t\tlow: \"default\",\n\t\t\t\t\tmedium: \"default\",\n\t\t\t\t\thigh: \"default\",\n\t\t\t\t\txhigh: \"default\",\n\t\t\t\t}\n\t\t\t: {};\n\treturn {\n\t\tsupportsStore: !isNonStandard,\n\t\tsupportsDeveloperRole: !isNonStandard,\n\t\tsupportsReasoningEffort: !isGrok && !isZai,\n\t\treasoningEffortMap,\n\t\tsupportsUsageInStreaming: true,\n\t\tmaxTokensField: useMaxTokens ? \"max_tokens\" : \"max_completion_tokens\",\n\t\trequiresToolResultName: false,\n\t\trequiresAssistantAfterToolResult: false,\n\t\trequiresThinkingAsText: false,\n\t\tthinkingFormat: isZai\n\t\t\t? \"zai\"\n\t\t\t: provider === \"openrouter\" || baseUrl.includes(\"openrouter.ai\")\n\t\t\t\t? \"openrouter\"\n\t\t\t\t: \"openai\",\n\t\topenRouterRouting: {},\n\t\tvercelGatewayRouting: {},\n\t\tsupportsStrictMode: true,\n\t};\n}\n\n/**\n * Get resolved compatibility settings for a model.\n * Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.\n */\nfunction getCompat(model: Model<\"openai-completions\">): Required {\n\tconst detected = detectCompat(model);\n\tif (!model.compat) return detected;\n\n\treturn {\n\t\tsupportsStore: model.compat.supportsStore ?? detected.supportsStore,\n\t\tsupportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,\n\t\tsupportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,\n\t\treasoningEffortMap: model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,\n\t\tsupportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,\n\t\tmaxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,\n\t\trequiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,\n\t\trequiresAssistantAfterToolResult:\n\t\t\tmodel.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,\n\t\trequiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,\n\t\tthinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,\n\t\topenRouterRouting: model.compat.openRouterRouting ?? {},\n\t\tvercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,\n\t\tsupportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,\n\t};\n}\n"]}