mirror of
https://github.com/ollama/ollama.git
synced 2026-04-17 15:53:27 +02:00
* bench: add prompt calibration, context size flag, and NumCtx reporting Add --num-ctx flag to set context size, and report NumCtx in model info header. Calibrate tokens-per-word ratio during warmup using actual tokenization metrics from the model, replacing the fixed 1.3 heuristic. This produces more accurate prompt token counts for --prompt-tokens. Also add fetchContextLength() to query running model context via /api/ps. * integration: improve vision test robustness and add thinking tests Add skipIfNoVisionOverride() to skip vision tests when OLLAMA_TEST_MODEL is set to a non-vision model. Add Think:false to context exhaustion test to prevent thinking models from using all context before the test can measure it. Add third test image (ollama homepage) and replace OCR test with ImageDescription test using it. Relax match strings for broader model compatibility. Add TestThinkingEnabled and TestThinkingSuppressed to verify thinking output and channel tag handling. * gemma4: add Gemma 4 GGML model support Add full Gemma 4 model family support (E2B, E4B, 26B MoE, 31B Dense) for the GGML backend including text, vision, converter, parser, and renderer. Text model features: - Sliding window + full attention with per-layer patterns - KV sharing across layers with donor map - Per-layer embeddings (PLE) with learned projections - MoE routing with RMSNorm + learned scale - Proportional RoPE with freq_factors for global attention - Final logit softcapping Vision model features: - SigLIP vision encoder with 2D RoPE - ClippableLinear with input/output clamping via packed v.clamp_data - Adaptive average pooling with nMerge kernel - Multi-modal projection with unweighted RMSNorm Converter: - Safetensors to GGUF with vision tensor renaming - Fused MoE gate_up_proj splitting - Vision patch embedding reshape (HF to Conv2D layout) - Packed clamp data tensor for ClippableLinear bounds - Proportional RoPE freq_factors generation Also includes: - BackendGet() on ml.Tensor for reading weight tensor data - Q6_K CUDA get_rows kernel support - MoE-aware ffn_down quantization layer counting - Gemma4 parser with tool calling and thinking support - Gemma4 renderer with structured tool format - Architecture-based auto-detection of renderer/parser/stop tokens - Integration test gemma4 model list additions * gemma4: add audio support with USM conformer encoder Add audio encoding for Gemma 4 using the USM conformer architecture: - Converter: audio tensor mapping, SSCP/conformer/embedder name replacements, softplus repacker for per_dim_scale, F32 enforcement for conv weights - GGML backend: Conv1DDW and PadExt tensor ops - Audio encoder: SSCP Conv2D, 12 conformer blocks (FFW + block-local attention with relative position embeddings + LightConv1d + FFW), output projection, audio-to-text embedding projector - Audio preprocessing: WAV decode, mel spectrogram, FFT (pure Go) - Model wiring: WAV detection, audio token handling, unified PostTokenize Correctly transcribes "why is the sky blue" from test audio. * integration: add gemma4 audio tests including OpenAI API coverage Test audio transcription and response via the Ollama native API, plus two new tests exercising the OpenAI-compatible endpoints: - /v1/audio/transcriptions (multipart form upload) - /v1/chat/completions with input_audio content type All tests use capability checks and skip models without audio support. * gemma4: add OpenAI audio API support and capability detection - Add CapabilityAudio and detect from audio.block_count in GGUF - Add /v1/audio/transcriptions endpoint with TranscriptionMiddleware - Add input_audio content type support in /v1/chat/completions - Add TranscriptionRequest/Response types in openai package * gemma4: add audio input support for run command - /audio toggle in interactive mode for voice chat - Platform-specific microphone recording (AVFoundation on macOS, PulseAudio/ALSA on Linux, WASAPI on Windows) - Space to start/stop recording, automatic chunking for long audio * gemma4: add transcribe command (ollama transcribe MODEL) - Interactive mode with readline prompt and slash commands - Non-interactive mode for piped audio or record-until-Ctrl+C - Chunked streaming transcription for long recordings - Word-wrapped output matching run command style * gemma4: add parser, renderer, and integration test plumbing * gemma4: fix renderer to emit BOS token * gemma4: add OpenAI audio transcription API and input_audio support * gemma4: update converter for new weight drop naming * gemma4: add per_expert_scale to MoE router and fix moe_intermediate_size config * gemma4: rewrite renderer to match HF Jinja2 template exactly Fix 8 bugs found by building 55 reference tests verified against the HF Jinja2 chat template (VERIFY_JINJA2=1 shells out to Python): - Tool responses use separate <|turn>tool turns (not inline tags) - Tool calls emitted before content in assistant messages - Thinking content stripped from assistant history (strip_thinking) - User, tool, and system content trimmed (template does | trim) - Empty system message still emits system turn (check role, not content) - Nested object properties rendered recursively with required field - Array items specification rendered for array-type properties - OBJECT/ARRAY type-specific rendering comma logic matches template Also adds Required field to api.ToolProperty for nested object schemas, replaces old gemma4_test.go with comprehensive gemma4_reference_test.go, and commits the Jinja2 template as testdata for verification. * gemma4: fix MoE fused gate_up split and multiline tool-call arg parsing - Text MoE: split `ffn_gate_up_exps` into contiguous `[gate|up]` halves instead of stride-2 slices. - Parser: escape control characters in `<|"|>...<|"|>` string literals when converting tool-call args to JSON. - Fixes warnings like `invalid character '\n' in string literal` for multiline tool arguments. - Add Gemma4 parser regressions for multiline tool-call args and `gemma4ArgsToJSON`. * cmd: simplify audio input to dropped file attachments * gemma4: use full SWA memory for better cache reuse * gemma4: initialize clamps after backend load * convert: align gemma4 audio tensor renames with llama.cpp * Remove redundant comments in gemma4 vision model * Format Gemma4 MoE block field alignment * use 4096 kvcache.NewSWAMemCache * convert: support new Gemma4 audio_tower tensor naming (#15221) Co-authored-by: jmorganca <jmorganca@gmail.com> * fix integration test defaults for audio * review comments and lint fixes * remove unused audio/video files --------- Co-authored-by: jmorganca <jmorganca@gmail.com>
923 lines
26 KiB
Go
923 lines
26 KiB
Go
// openai package provides core transformation logic for partial compatibility with the OpenAI REST API
|
|
package openai
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/base64"
|
|
"encoding/binary"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"net/http"
|
|
"slices"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/ollama/ollama/api"
|
|
"github.com/ollama/ollama/types/model"
|
|
)
|
|
|
|
var finishReasonToolCalls = "tool_calls"
|
|
|
|
type Error struct {
|
|
Message string `json:"message"`
|
|
Type string `json:"type"`
|
|
Param any `json:"param"`
|
|
Code *string `json:"code"`
|
|
}
|
|
|
|
type ErrorResponse struct {
|
|
Error Error `json:"error"`
|
|
}
|
|
|
|
type Message struct {
|
|
Role string `json:"role"`
|
|
Content any `json:"content"`
|
|
Reasoning string `json:"reasoning,omitempty"`
|
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
|
Name string `json:"name,omitempty"`
|
|
ToolCallID string `json:"tool_call_id,omitempty"`
|
|
}
|
|
|
|
type ChoiceLogprobs struct {
|
|
Content []api.Logprob `json:"content"`
|
|
}
|
|
|
|
type Choice struct {
|
|
Index int `json:"index"`
|
|
Message Message `json:"message"`
|
|
FinishReason *string `json:"finish_reason"`
|
|
Logprobs *ChoiceLogprobs `json:"logprobs,omitempty"`
|
|
}
|
|
|
|
type ChunkChoice struct {
|
|
Index int `json:"index"`
|
|
Delta Message `json:"delta"`
|
|
FinishReason *string `json:"finish_reason"`
|
|
Logprobs *ChoiceLogprobs `json:"logprobs,omitempty"`
|
|
}
|
|
|
|
type CompleteChunkChoice struct {
|
|
Text string `json:"text"`
|
|
Index int `json:"index"`
|
|
FinishReason *string `json:"finish_reason"`
|
|
Logprobs *ChoiceLogprobs `json:"logprobs,omitempty"`
|
|
}
|
|
|
|
type Usage struct {
|
|
PromptTokens int `json:"prompt_tokens"`
|
|
CompletionTokens int `json:"completion_tokens"`
|
|
TotalTokens int `json:"total_tokens"`
|
|
}
|
|
|
|
type ResponseFormat struct {
|
|
Type string `json:"type"`
|
|
JsonSchema *JsonSchema `json:"json_schema,omitempty"`
|
|
}
|
|
|
|
type JsonSchema struct {
|
|
Schema json.RawMessage `json:"schema"`
|
|
}
|
|
|
|
type EmbedRequest struct {
|
|
Input any `json:"input"`
|
|
Model string `json:"model"`
|
|
Dimensions int `json:"dimensions,omitempty"`
|
|
EncodingFormat string `json:"encoding_format,omitempty"` // "float" or "base64"
|
|
}
|
|
|
|
type StreamOptions struct {
|
|
IncludeUsage bool `json:"include_usage"`
|
|
}
|
|
|
|
type Reasoning struct {
|
|
Effort string `json:"effort,omitempty"`
|
|
}
|
|
|
|
type ChatCompletionRequest struct {
|
|
Model string `json:"model"`
|
|
Messages []Message `json:"messages"`
|
|
Stream bool `json:"stream"`
|
|
StreamOptions *StreamOptions `json:"stream_options"`
|
|
MaxTokens *int `json:"max_tokens"`
|
|
Seed *int `json:"seed"`
|
|
Stop any `json:"stop"`
|
|
Temperature *float64 `json:"temperature"`
|
|
FrequencyPenalty *float64 `json:"frequency_penalty"`
|
|
PresencePenalty *float64 `json:"presence_penalty"`
|
|
TopP *float64 `json:"top_p"`
|
|
ResponseFormat *ResponseFormat `json:"response_format"`
|
|
Tools []api.Tool `json:"tools"`
|
|
Reasoning *Reasoning `json:"reasoning,omitempty"`
|
|
ReasoningEffort *string `json:"reasoning_effort,omitempty"`
|
|
Logprobs *bool `json:"logprobs"`
|
|
TopLogprobs int `json:"top_logprobs"`
|
|
DebugRenderOnly bool `json:"_debug_render_only"`
|
|
}
|
|
|
|
type ChatCompletion struct {
|
|
Id string `json:"id"`
|
|
Object string `json:"object"`
|
|
Created int64 `json:"created"`
|
|
Model string `json:"model"`
|
|
SystemFingerprint string `json:"system_fingerprint"`
|
|
Choices []Choice `json:"choices"`
|
|
Usage Usage `json:"usage,omitempty"`
|
|
DebugInfo *api.DebugInfo `json:"_debug_info,omitempty"`
|
|
}
|
|
|
|
type ChatCompletionChunk struct {
|
|
Id string `json:"id"`
|
|
Object string `json:"object"`
|
|
Created int64 `json:"created"`
|
|
Model string `json:"model"`
|
|
SystemFingerprint string `json:"system_fingerprint"`
|
|
Choices []ChunkChoice `json:"choices"`
|
|
Usage *Usage `json:"usage,omitempty"`
|
|
}
|
|
|
|
// TODO (https://github.com/ollama/ollama/issues/5259): support []string, []int and [][]int
|
|
type CompletionRequest struct {
|
|
Model string `json:"model"`
|
|
Prompt string `json:"prompt"`
|
|
FrequencyPenalty float32 `json:"frequency_penalty"`
|
|
MaxTokens *int `json:"max_tokens"`
|
|
PresencePenalty float32 `json:"presence_penalty"`
|
|
Seed *int `json:"seed"`
|
|
Stop any `json:"stop"`
|
|
Stream bool `json:"stream"`
|
|
StreamOptions *StreamOptions `json:"stream_options"`
|
|
Temperature *float32 `json:"temperature"`
|
|
TopP float32 `json:"top_p"`
|
|
Suffix string `json:"suffix"`
|
|
Logprobs *int `json:"logprobs"`
|
|
DebugRenderOnly bool `json:"_debug_render_only"`
|
|
}
|
|
|
|
type Completion struct {
|
|
Id string `json:"id"`
|
|
Object string `json:"object"`
|
|
Created int64 `json:"created"`
|
|
Model string `json:"model"`
|
|
SystemFingerprint string `json:"system_fingerprint"`
|
|
Choices []CompleteChunkChoice `json:"choices"`
|
|
Usage Usage `json:"usage,omitempty"`
|
|
}
|
|
|
|
type CompletionChunk struct {
|
|
Id string `json:"id"`
|
|
Object string `json:"object"`
|
|
Created int64 `json:"created"`
|
|
Choices []CompleteChunkChoice `json:"choices"`
|
|
Model string `json:"model"`
|
|
SystemFingerprint string `json:"system_fingerprint"`
|
|
Usage *Usage `json:"usage,omitempty"`
|
|
}
|
|
|
|
type ToolCall struct {
|
|
ID string `json:"id"`
|
|
Index int `json:"index"`
|
|
Type string `json:"type"`
|
|
Function struct {
|
|
Name string `json:"name"`
|
|
Arguments string `json:"arguments"`
|
|
} `json:"function"`
|
|
}
|
|
|
|
type Model struct {
|
|
Id string `json:"id"`
|
|
Object string `json:"object"`
|
|
Created int64 `json:"created"`
|
|
OwnedBy string `json:"owned_by"`
|
|
}
|
|
|
|
type Embedding struct {
|
|
Object string `json:"object"`
|
|
Embedding any `json:"embedding"` // Can be []float32 (float format) or string (base64 format)
|
|
Index int `json:"index"`
|
|
}
|
|
|
|
type ListCompletion struct {
|
|
Object string `json:"object"`
|
|
Data []Model `json:"data"`
|
|
}
|
|
|
|
type EmbeddingList struct {
|
|
Object string `json:"object"`
|
|
Data []Embedding `json:"data"`
|
|
Model string `json:"model"`
|
|
Usage EmbeddingUsage `json:"usage,omitempty"`
|
|
}
|
|
|
|
type EmbeddingUsage struct {
|
|
PromptTokens int `json:"prompt_tokens"`
|
|
TotalTokens int `json:"total_tokens"`
|
|
}
|
|
|
|
func NewError(code int, message string) ErrorResponse {
|
|
var etype string
|
|
switch code {
|
|
case http.StatusBadRequest:
|
|
etype = "invalid_request_error"
|
|
case http.StatusNotFound:
|
|
etype = "not_found_error"
|
|
default:
|
|
etype = "api_error"
|
|
}
|
|
|
|
return ErrorResponse{Error{Type: etype, Message: message}}
|
|
}
|
|
|
|
// ToUsage converts an api.ChatResponse to Usage
|
|
func ToUsage(r api.ChatResponse) Usage {
|
|
return Usage{
|
|
PromptTokens: r.Metrics.PromptEvalCount,
|
|
CompletionTokens: r.Metrics.EvalCount,
|
|
TotalTokens: r.Metrics.PromptEvalCount + r.Metrics.EvalCount,
|
|
}
|
|
}
|
|
|
|
// ToToolCalls converts api.ToolCall to OpenAI ToolCall format
|
|
func ToToolCalls(tc []api.ToolCall) []ToolCall {
|
|
toolCalls := make([]ToolCall, len(tc))
|
|
for i, tc := range tc {
|
|
toolCalls[i].ID = tc.ID
|
|
toolCalls[i].Type = "function"
|
|
toolCalls[i].Function.Name = tc.Function.Name
|
|
toolCalls[i].Index = tc.Function.Index
|
|
|
|
args, err := json.Marshal(tc.Function.Arguments)
|
|
if err != nil {
|
|
slog.Error("could not marshall function arguments to json", "error", err)
|
|
continue
|
|
}
|
|
|
|
toolCalls[i].Function.Arguments = string(args)
|
|
}
|
|
return toolCalls
|
|
}
|
|
|
|
// ToChatCompletion converts an api.ChatResponse to ChatCompletion
|
|
func ToChatCompletion(id string, r api.ChatResponse) ChatCompletion {
|
|
toolCalls := ToToolCalls(r.Message.ToolCalls)
|
|
|
|
var logprobs *ChoiceLogprobs
|
|
if len(r.Logprobs) > 0 {
|
|
logprobs = &ChoiceLogprobs{Content: r.Logprobs}
|
|
}
|
|
|
|
return ChatCompletion{
|
|
Id: id,
|
|
Object: "chat.completion",
|
|
Created: r.CreatedAt.Unix(),
|
|
Model: r.Model,
|
|
SystemFingerprint: "fp_ollama",
|
|
Choices: []Choice{{
|
|
Index: 0,
|
|
Message: Message{Role: r.Message.Role, Content: r.Message.Content, ToolCalls: toolCalls, Reasoning: r.Message.Thinking},
|
|
FinishReason: func(reason string) *string {
|
|
if len(toolCalls) > 0 {
|
|
reason = "tool_calls"
|
|
}
|
|
if len(reason) > 0 {
|
|
return &reason
|
|
}
|
|
return nil
|
|
}(r.DoneReason),
|
|
Logprobs: logprobs,
|
|
}}, Usage: ToUsage(r),
|
|
DebugInfo: r.DebugInfo,
|
|
}
|
|
}
|
|
|
|
func toChunk(id string, r api.ChatResponse, toolCallSent bool) ChatCompletionChunk {
|
|
toolCalls := ToToolCalls(r.Message.ToolCalls)
|
|
|
|
var logprobs *ChoiceLogprobs
|
|
if len(r.Logprobs) > 0 {
|
|
logprobs = &ChoiceLogprobs{Content: r.Logprobs}
|
|
}
|
|
|
|
return ChatCompletionChunk{
|
|
Id: id,
|
|
Object: "chat.completion.chunk",
|
|
Created: time.Now().Unix(),
|
|
Model: r.Model,
|
|
SystemFingerprint: "fp_ollama",
|
|
Choices: []ChunkChoice{{
|
|
Index: 0,
|
|
Delta: Message{Role: "assistant", Content: r.Message.Content, ToolCalls: toolCalls, Reasoning: r.Message.Thinking},
|
|
FinishReason: func(reason string) *string {
|
|
if len(reason) > 0 {
|
|
if toolCallSent || len(toolCalls) > 0 {
|
|
return &finishReasonToolCalls
|
|
}
|
|
return &reason
|
|
}
|
|
return nil
|
|
}(r.DoneReason),
|
|
Logprobs: logprobs,
|
|
}},
|
|
}
|
|
}
|
|
|
|
// ToChunks converts an api.ChatResponse to one or more ChatCompletionChunk values.
|
|
func ToChunks(id string, r api.ChatResponse, toolCallSent bool) []ChatCompletionChunk {
|
|
hasMixedResponse := r.Message.Thinking != "" && (r.Message.Content != "" || len(r.Message.ToolCalls) > 0)
|
|
if !hasMixedResponse {
|
|
return []ChatCompletionChunk{toChunk(id, r, toolCallSent)}
|
|
}
|
|
|
|
reasoningChunk := toChunk(id, r, toolCallSent)
|
|
// The logprobs here might include tokens not in this chunk because we now split between thinking and content/tool calls.
|
|
reasoningChunk.Choices[0].Delta.Content = ""
|
|
reasoningChunk.Choices[0].Delta.ToolCalls = nil
|
|
reasoningChunk.Choices[0].FinishReason = nil
|
|
|
|
contentOrToolCallsChunk := toChunk(id, r, toolCallSent)
|
|
// Keep both split chunks on the same timestamp since they represent one logical emission.
|
|
contentOrToolCallsChunk.Created = reasoningChunk.Created
|
|
contentOrToolCallsChunk.Choices[0].Delta.Reasoning = ""
|
|
contentOrToolCallsChunk.Choices[0].Logprobs = nil
|
|
|
|
return []ChatCompletionChunk{
|
|
reasoningChunk,
|
|
contentOrToolCallsChunk,
|
|
}
|
|
}
|
|
|
|
// Deprecated: use ToChunks for streaming conversion.
|
|
func ToChunk(id string, r api.ChatResponse, toolCallSent bool) ChatCompletionChunk {
|
|
return toChunk(id, r, toolCallSent)
|
|
}
|
|
|
|
// ToUsageGenerate converts an api.GenerateResponse to Usage
|
|
func ToUsageGenerate(r api.GenerateResponse) Usage {
|
|
return Usage{
|
|
PromptTokens: r.Metrics.PromptEvalCount,
|
|
CompletionTokens: r.Metrics.EvalCount,
|
|
TotalTokens: r.Metrics.PromptEvalCount + r.Metrics.EvalCount,
|
|
}
|
|
}
|
|
|
|
// ToCompletion converts an api.GenerateResponse to Completion
|
|
func ToCompletion(id string, r api.GenerateResponse) Completion {
|
|
return Completion{
|
|
Id: id,
|
|
Object: "text_completion",
|
|
Created: r.CreatedAt.Unix(),
|
|
Model: r.Model,
|
|
SystemFingerprint: "fp_ollama",
|
|
Choices: []CompleteChunkChoice{{
|
|
Text: r.Response,
|
|
Index: 0,
|
|
FinishReason: func(reason string) *string {
|
|
if len(reason) > 0 {
|
|
return &reason
|
|
}
|
|
return nil
|
|
}(r.DoneReason),
|
|
}},
|
|
Usage: ToUsageGenerate(r),
|
|
}
|
|
}
|
|
|
|
// ToCompleteChunk converts an api.GenerateResponse to CompletionChunk
|
|
func ToCompleteChunk(id string, r api.GenerateResponse) CompletionChunk {
|
|
return CompletionChunk{
|
|
Id: id,
|
|
Object: "text_completion",
|
|
Created: time.Now().Unix(),
|
|
Model: r.Model,
|
|
SystemFingerprint: "fp_ollama",
|
|
Choices: []CompleteChunkChoice{{
|
|
Text: r.Response,
|
|
Index: 0,
|
|
FinishReason: func(reason string) *string {
|
|
if len(reason) > 0 {
|
|
return &reason
|
|
}
|
|
return nil
|
|
}(r.DoneReason),
|
|
}},
|
|
}
|
|
}
|
|
|
|
// ToListCompletion converts an api.ListResponse to ListCompletion
|
|
func ToListCompletion(r api.ListResponse) ListCompletion {
|
|
var data []Model
|
|
for _, m := range r.Models {
|
|
data = append(data, Model{
|
|
Id: m.Name,
|
|
Object: "model",
|
|
Created: m.ModifiedAt.Unix(),
|
|
OwnedBy: model.ParseName(m.Name).Namespace,
|
|
})
|
|
}
|
|
|
|
return ListCompletion{
|
|
Object: "list",
|
|
Data: data,
|
|
}
|
|
}
|
|
|
|
// ToEmbeddingList converts an api.EmbedResponse to EmbeddingList
|
|
// encodingFormat can be "float", "base64", or empty (defaults to "float")
|
|
func ToEmbeddingList(model string, r api.EmbedResponse, encodingFormat string) EmbeddingList {
|
|
if r.Embeddings != nil {
|
|
var data []Embedding
|
|
for i, e := range r.Embeddings {
|
|
var embedding any
|
|
if strings.EqualFold(encodingFormat, "base64") {
|
|
embedding = floatsToBase64(e)
|
|
} else {
|
|
embedding = e
|
|
}
|
|
|
|
data = append(data, Embedding{
|
|
Object: "embedding",
|
|
Embedding: embedding,
|
|
Index: i,
|
|
})
|
|
}
|
|
|
|
return EmbeddingList{
|
|
Object: "list",
|
|
Data: data,
|
|
Model: model,
|
|
Usage: EmbeddingUsage{
|
|
PromptTokens: r.PromptEvalCount,
|
|
TotalTokens: r.PromptEvalCount,
|
|
},
|
|
}
|
|
}
|
|
|
|
return EmbeddingList{}
|
|
}
|
|
|
|
// floatsToBase64 encodes a []float32 to a base64 string
|
|
func floatsToBase64(floats []float32) string {
|
|
var buf bytes.Buffer
|
|
binary.Write(&buf, binary.LittleEndian, floats)
|
|
return base64.StdEncoding.EncodeToString(buf.Bytes())
|
|
}
|
|
|
|
// ToModel converts an api.ShowResponse to Model
|
|
func ToModel(r api.ShowResponse, m string) Model {
|
|
return Model{
|
|
Id: m,
|
|
Object: "model",
|
|
Created: r.ModifiedAt.Unix(),
|
|
OwnedBy: model.ParseName(m).Namespace,
|
|
}
|
|
}
|
|
|
|
// FromChatRequest converts a ChatCompletionRequest to api.ChatRequest
|
|
func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
|
var messages []api.Message
|
|
for _, msg := range r.Messages {
|
|
toolName := ""
|
|
if strings.ToLower(msg.Role) == "tool" {
|
|
toolName = msg.Name
|
|
if toolName == "" && msg.ToolCallID != "" {
|
|
toolName = nameFromToolCallID(r.Messages, msg.ToolCallID)
|
|
}
|
|
}
|
|
switch content := msg.Content.(type) {
|
|
case string:
|
|
toolCalls, err := FromCompletionToolCall(msg.ToolCalls)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
messages = append(messages, api.Message{Role: msg.Role, Content: content, Thinking: msg.Reasoning, ToolCalls: toolCalls, ToolName: toolName, ToolCallID: msg.ToolCallID})
|
|
case []any:
|
|
for _, c := range content {
|
|
data, ok := c.(map[string]any)
|
|
if !ok {
|
|
return nil, errors.New("invalid message format")
|
|
}
|
|
switch data["type"] {
|
|
case "text":
|
|
text, ok := data["text"].(string)
|
|
if !ok {
|
|
return nil, errors.New("invalid message format")
|
|
}
|
|
messages = append(messages, api.Message{Role: msg.Role, Content: text})
|
|
case "image_url":
|
|
var url string
|
|
if urlMap, ok := data["image_url"].(map[string]any); ok {
|
|
if url, ok = urlMap["url"].(string); !ok {
|
|
return nil, errors.New("invalid message format")
|
|
}
|
|
} else {
|
|
if url, ok = data["image_url"].(string); !ok {
|
|
return nil, errors.New("invalid message format")
|
|
}
|
|
}
|
|
|
|
img, err := decodeImageURL(url)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
messages = append(messages, api.Message{Role: msg.Role, Images: []api.ImageData{img}})
|
|
case "input_audio":
|
|
audioMap, ok := data["input_audio"].(map[string]any)
|
|
if !ok {
|
|
return nil, errors.New("invalid input_audio format")
|
|
}
|
|
b64Data, ok := audioMap["data"].(string)
|
|
if !ok {
|
|
return nil, errors.New("invalid input_audio format: missing data")
|
|
}
|
|
audioBytes, err := base64.StdEncoding.DecodeString(b64Data)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid input_audio base64 data: %w", err)
|
|
}
|
|
messages = append(messages, api.Message{Role: msg.Role, Images: []api.ImageData{audioBytes}})
|
|
default:
|
|
return nil, errors.New("invalid message format")
|
|
}
|
|
}
|
|
// since we might have added multiple messages above, if we have tools
|
|
// calls we'll add them to the last message
|
|
if len(messages) > 0 && len(msg.ToolCalls) > 0 {
|
|
toolCalls, err := FromCompletionToolCall(msg.ToolCalls)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
messages[len(messages)-1].ToolCalls = toolCalls
|
|
messages[len(messages)-1].ToolName = toolName
|
|
messages[len(messages)-1].ToolCallID = msg.ToolCallID
|
|
messages[len(messages)-1].Thinking = msg.Reasoning
|
|
}
|
|
default:
|
|
// content is only optional if tool calls are present
|
|
if msg.ToolCalls == nil {
|
|
return nil, fmt.Errorf("invalid message content type: %T", content)
|
|
}
|
|
|
|
toolCalls, err := FromCompletionToolCall(msg.ToolCalls)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
messages = append(messages, api.Message{Role: msg.Role, Thinking: msg.Reasoning, ToolCalls: toolCalls, ToolCallID: msg.ToolCallID})
|
|
}
|
|
}
|
|
|
|
options := make(map[string]any)
|
|
|
|
switch stop := r.Stop.(type) {
|
|
case string:
|
|
options["stop"] = []string{stop}
|
|
case []any:
|
|
var stops []string
|
|
for _, s := range stop {
|
|
if str, ok := s.(string); ok {
|
|
stops = append(stops, str)
|
|
}
|
|
}
|
|
options["stop"] = stops
|
|
}
|
|
|
|
if r.MaxTokens != nil {
|
|
options["num_predict"] = *r.MaxTokens
|
|
}
|
|
|
|
if r.Temperature != nil {
|
|
options["temperature"] = *r.Temperature
|
|
} else {
|
|
options["temperature"] = 1.0
|
|
}
|
|
|
|
if r.Seed != nil {
|
|
options["seed"] = *r.Seed
|
|
}
|
|
|
|
if r.FrequencyPenalty != nil {
|
|
options["frequency_penalty"] = *r.FrequencyPenalty
|
|
}
|
|
|
|
if r.PresencePenalty != nil {
|
|
options["presence_penalty"] = *r.PresencePenalty
|
|
}
|
|
|
|
if r.TopP != nil {
|
|
options["top_p"] = *r.TopP
|
|
} else {
|
|
options["top_p"] = 1.0
|
|
}
|
|
|
|
var format json.RawMessage
|
|
if r.ResponseFormat != nil {
|
|
switch strings.ToLower(strings.TrimSpace(r.ResponseFormat.Type)) {
|
|
// Support the old "json_object" type for OpenAI compatibility
|
|
case "json_object":
|
|
format = json.RawMessage(`"json"`)
|
|
case "json_schema":
|
|
if r.ResponseFormat.JsonSchema != nil {
|
|
format = r.ResponseFormat.JsonSchema.Schema
|
|
}
|
|
}
|
|
}
|
|
|
|
var think *api.ThinkValue
|
|
var effort string
|
|
|
|
if r.Reasoning != nil {
|
|
effort = r.Reasoning.Effort
|
|
} else if r.ReasoningEffort != nil {
|
|
effort = *r.ReasoningEffort
|
|
}
|
|
|
|
if effort != "" {
|
|
if !slices.Contains([]string{"high", "medium", "low", "none"}, effort) {
|
|
return nil, fmt.Errorf("invalid reasoning value: '%s' (must be \"high\", \"medium\", \"low\", or \"none\")", effort)
|
|
}
|
|
|
|
if effort == "none" {
|
|
think = &api.ThinkValue{Value: false}
|
|
} else {
|
|
think = &api.ThinkValue{Value: effort}
|
|
}
|
|
}
|
|
|
|
return &api.ChatRequest{
|
|
Model: r.Model,
|
|
Messages: messages,
|
|
Format: format,
|
|
Options: options,
|
|
Stream: &r.Stream,
|
|
Tools: r.Tools,
|
|
Think: think,
|
|
Logprobs: r.Logprobs != nil && *r.Logprobs,
|
|
TopLogprobs: r.TopLogprobs,
|
|
DebugRenderOnly: r.DebugRenderOnly,
|
|
}, nil
|
|
}
|
|
|
|
func nameFromToolCallID(messages []Message, toolCallID string) string {
|
|
// iterate backwards to be more resilient to duplicate tool call IDs (this
|
|
// follows "last one wins")
|
|
for i := len(messages) - 1; i >= 0; i-- {
|
|
msg := messages[i]
|
|
for _, tc := range msg.ToolCalls {
|
|
if tc.ID == toolCallID {
|
|
return tc.Function.Name
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// decodeImageURL decodes a base64 data URI into raw image bytes.
|
|
func decodeImageURL(url string) (api.ImageData, error) {
|
|
if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") {
|
|
return nil, errors.New("image URLs are not currently supported, please use base64 encoded data instead")
|
|
}
|
|
|
|
types := []string{"jpeg", "jpg", "png", "webp"}
|
|
|
|
// Support blank mime type to match /api/chat's behavior of taking just unadorned base64
|
|
if strings.HasPrefix(url, "data:;base64,") {
|
|
url = strings.TrimPrefix(url, "data:;base64,")
|
|
} else {
|
|
valid := false
|
|
for _, t := range types {
|
|
prefix := "data:image/" + t + ";base64,"
|
|
if strings.HasPrefix(url, prefix) {
|
|
url = strings.TrimPrefix(url, prefix)
|
|
valid = true
|
|
break
|
|
}
|
|
}
|
|
if !valid {
|
|
return nil, errors.New("invalid image input")
|
|
}
|
|
}
|
|
|
|
img, err := base64.StdEncoding.DecodeString(url)
|
|
if err != nil {
|
|
return nil, errors.New("invalid image input")
|
|
}
|
|
return img, nil
|
|
}
|
|
|
|
// FromCompletionToolCall converts OpenAI ToolCall format to api.ToolCall
|
|
func FromCompletionToolCall(toolCalls []ToolCall) ([]api.ToolCall, error) {
|
|
apiToolCalls := make([]api.ToolCall, len(toolCalls))
|
|
for i, tc := range toolCalls {
|
|
apiToolCalls[i].ID = tc.ID
|
|
apiToolCalls[i].Function.Name = tc.Function.Name
|
|
err := json.Unmarshal([]byte(tc.Function.Arguments), &apiToolCalls[i].Function.Arguments)
|
|
if err != nil {
|
|
return nil, errors.New("invalid tool call arguments")
|
|
}
|
|
}
|
|
|
|
return apiToolCalls, nil
|
|
}
|
|
|
|
// FromCompleteRequest converts a CompletionRequest to api.GenerateRequest
|
|
func FromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) {
|
|
options := make(map[string]any)
|
|
|
|
switch stop := r.Stop.(type) {
|
|
case string:
|
|
options["stop"] = []string{stop}
|
|
case []any:
|
|
var stops []string
|
|
for _, s := range stop {
|
|
if str, ok := s.(string); ok {
|
|
stops = append(stops, str)
|
|
} else {
|
|
return api.GenerateRequest{}, fmt.Errorf("invalid type for 'stop' field: %T", s)
|
|
}
|
|
}
|
|
options["stop"] = stops
|
|
}
|
|
|
|
if r.MaxTokens != nil {
|
|
options["num_predict"] = *r.MaxTokens
|
|
}
|
|
|
|
if r.Temperature != nil {
|
|
options["temperature"] = *r.Temperature
|
|
} else {
|
|
options["temperature"] = 1.0
|
|
}
|
|
|
|
if r.Seed != nil {
|
|
options["seed"] = *r.Seed
|
|
}
|
|
|
|
options["frequency_penalty"] = r.FrequencyPenalty
|
|
|
|
options["presence_penalty"] = r.PresencePenalty
|
|
|
|
if r.TopP != 0.0 {
|
|
options["top_p"] = r.TopP
|
|
} else {
|
|
options["top_p"] = 1.0
|
|
}
|
|
|
|
var logprobs bool
|
|
var topLogprobs int
|
|
if r.Logprobs != nil && *r.Logprobs > 0 {
|
|
logprobs = true
|
|
topLogprobs = *r.Logprobs
|
|
}
|
|
|
|
return api.GenerateRequest{
|
|
Model: r.Model,
|
|
Prompt: r.Prompt,
|
|
Options: options,
|
|
Stream: &r.Stream,
|
|
Suffix: r.Suffix,
|
|
Logprobs: logprobs,
|
|
TopLogprobs: topLogprobs,
|
|
DebugRenderOnly: r.DebugRenderOnly,
|
|
}, nil
|
|
}
|
|
|
|
// ImageGenerationRequest is an OpenAI-compatible image generation request.
|
|
type ImageGenerationRequest struct {
|
|
Model string `json:"model"`
|
|
Prompt string `json:"prompt"`
|
|
N int `json:"n,omitempty"`
|
|
Size string `json:"size,omitempty"`
|
|
ResponseFormat string `json:"response_format,omitempty"`
|
|
Seed *int64 `json:"seed,omitempty"`
|
|
}
|
|
|
|
// ImageGenerationResponse is an OpenAI-compatible image generation response.
|
|
type ImageGenerationResponse struct {
|
|
Created int64 `json:"created"`
|
|
Data []ImageURLOrData `json:"data"`
|
|
}
|
|
|
|
// ImageURLOrData contains either a URL or base64-encoded image data.
|
|
type ImageURLOrData struct {
|
|
URL string `json:"url,omitempty"`
|
|
B64JSON string `json:"b64_json,omitempty"`
|
|
}
|
|
|
|
// FromImageGenerationRequest converts an OpenAI image generation request to an Ollama GenerateRequest.
|
|
func FromImageGenerationRequest(r ImageGenerationRequest) api.GenerateRequest {
|
|
req := api.GenerateRequest{
|
|
Model: r.Model,
|
|
Prompt: r.Prompt,
|
|
}
|
|
// Parse size if provided (e.g., "1024x768")
|
|
if r.Size != "" {
|
|
var w, h int32
|
|
if _, err := fmt.Sscanf(r.Size, "%dx%d", &w, &h); err == nil {
|
|
req.Width = w
|
|
req.Height = h
|
|
}
|
|
}
|
|
if r.Seed != nil {
|
|
if req.Options == nil {
|
|
req.Options = map[string]any{}
|
|
}
|
|
req.Options["seed"] = *r.Seed
|
|
}
|
|
return req
|
|
}
|
|
|
|
// ToImageGenerationResponse converts an Ollama GenerateResponse to an OpenAI ImageGenerationResponse.
|
|
func ToImageGenerationResponse(resp api.GenerateResponse) ImageGenerationResponse {
|
|
var data []ImageURLOrData
|
|
if resp.Image != "" {
|
|
data = []ImageURLOrData{{B64JSON: resp.Image}}
|
|
}
|
|
return ImageGenerationResponse{
|
|
Created: resp.CreatedAt.Unix(),
|
|
Data: data,
|
|
}
|
|
}
|
|
|
|
// TranscriptionResponse is the response format for /v1/audio/transcriptions.
|
|
type TranscriptionResponse struct {
|
|
Text string `json:"text"`
|
|
}
|
|
|
|
// TranscriptionRequest holds parsed fields from the multipart form.
|
|
type TranscriptionRequest struct {
|
|
Model string
|
|
AudioData []byte
|
|
ResponseFormat string // "json", "text", "verbose_json"
|
|
Language string
|
|
Prompt string
|
|
}
|
|
|
|
// FromTranscriptionRequest converts a transcription request into a ChatRequest
|
|
// by wrapping the audio with a system prompt for transcription.
|
|
func FromTranscriptionRequest(r TranscriptionRequest) (*api.ChatRequest, error) {
|
|
systemPrompt := "Transcribe the following audio exactly as spoken. Output only the transcription text, nothing else."
|
|
if r.Language != "" {
|
|
systemPrompt += " The audio is in " + r.Language + "."
|
|
}
|
|
if r.Prompt != "" {
|
|
systemPrompt += " Context: " + r.Prompt
|
|
}
|
|
|
|
stream := true
|
|
return &api.ChatRequest{
|
|
Model: r.Model,
|
|
Messages: []api.Message{
|
|
{Role: "system", Content: systemPrompt},
|
|
{Role: "user", Content: "Transcribe this audio.", Images: []api.ImageData{r.AudioData}},
|
|
},
|
|
Stream: &stream,
|
|
Options: map[string]any{
|
|
"temperature": 0,
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
// ImageEditRequest is an OpenAI-compatible image edit request.
|
|
type ImageEditRequest struct {
|
|
Model string `json:"model"`
|
|
Prompt string `json:"prompt"`
|
|
Image string `json:"image"` // Base64-encoded image data
|
|
Size string `json:"size,omitempty"` // e.g., "1024x1024"
|
|
Seed *int64 `json:"seed,omitempty"`
|
|
}
|
|
|
|
// FromImageEditRequest converts an OpenAI image edit request to an Ollama GenerateRequest.
|
|
func FromImageEditRequest(r ImageEditRequest) (api.GenerateRequest, error) {
|
|
req := api.GenerateRequest{
|
|
Model: r.Model,
|
|
Prompt: r.Prompt,
|
|
}
|
|
|
|
// Decode the input image
|
|
if r.Image != "" {
|
|
imgData, err := decodeImageURL(r.Image)
|
|
if err != nil {
|
|
return api.GenerateRequest{}, fmt.Errorf("invalid image: %w", err)
|
|
}
|
|
req.Images = append(req.Images, imgData)
|
|
}
|
|
|
|
// Parse size if provided (e.g., "1024x768")
|
|
if r.Size != "" {
|
|
var w, h int32
|
|
if _, err := fmt.Sscanf(r.Size, "%dx%d", &w, &h); err == nil {
|
|
req.Width = w
|
|
req.Height = h
|
|
}
|
|
}
|
|
|
|
if r.Seed != nil {
|
|
if req.Options == nil {
|
|
req.Options = map[string]any{}
|
|
}
|
|
req.Options["seed"] = *r.Seed
|
|
}
|
|
|
|
return req, nil
|
|
}
|