Compare commits

...

10 Commits

Author SHA1 Message Date
Patrick Devine
81b9cb7fa9 ollama pull manifest list support 2026-04-24 12:40:12 -07:00
Patrick Devine
7fd96eba96 ollama push w/ manifest lists 2026-04-24 08:41:11 -07:00
Patrick Devine
2dcc80204d hide the --runner flag in ollama run 2026-04-23 17:39:46 -07:00
Patrick Devine
0d863c8cf4 add ollama show cli 2026-04-23 17:37:27 -07:00
Patrick Devine
f636014ac7 add manifest list support to /api/show 2026-04-23 17:03:03 -07:00
Patrick Devine
00188139f1 manifest lists: fix size calculation in ollama ls 2026-04-23 11:07:54 -07:00
Patrick Devine
9658029516 more manifest list stuff 2026-04-22 18:51:45 -07:00
Patrick Devine
961ae1b10c introduce manifest lists 2026-04-21 18:28:14 -07:00
Patrick Devine
7bcdb250b9 fix failing client2 unit tests 2026-04-21 13:56:39 -07:00
Patrick Devine
7bbcd2e6be server: add v2 manifest path
This change adds a new manifest-v2/ path for new models created with the
create/pull/copy commands. Under manifest-v2, manifests are now just blobs which are
content addressable similar to tensors/config files. The named tags instead
will symlink/hard link/contain a copy depending on what the file system supports.

Downgrades to older versions of ollama are still possible, but any create/pull/copy
done with the newer version will potentially have its blobs pruned by the older
version.

manifest-v2 also changes the default registry name to `ollama.com` instead of
`registry.ollama.ai`.
2026-04-21 12:05:54 -07:00
28 changed files with 4291 additions and 441 deletions

View File

@@ -403,6 +403,21 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err
return &resp, nil return &resp, nil
} }
// ShowManifests obtains model information for all manifests in a manifest list.
func (c *Client) ShowManifests(ctx context.Context, req *ShowRequest) (*ShowManifestsResponse, error) {
showReq := &ShowRequest{AllManifests: true}
if req != nil {
*showReq = *req
showReq.AllManifests = true
}
var resp ShowManifestsResponse
if err := c.do(ctx, http.MethodPost, "/api/show", showReq, &resp); err != nil {
return nil, err
}
return &resp, nil
}
// Heartbeat checks if the server has started and is responsive; if yes, it // Heartbeat checks if the server has started and is responsive; if yes, it
// returns nil, otherwise an error. // returns nil, otherwise an error.
func (c *Client) Heartbeat(ctx context.Context) error { func (c *Client) Heartbeat(ctx context.Context) error {

View File

@@ -64,6 +64,9 @@ type GenerateRequest struct {
// the library at https://ollama.com/library // the library at https://ollama.com/library
Model string `json:"model"` Model string `json:"model"`
// Runner selects a runner variant from a manifest list.
Runner string `json:"runner,omitempty"`
// Prompt is the textual prompt to send to the model. // Prompt is the textual prompt to send to the model.
Prompt string `json:"prompt"` Prompt string `json:"prompt"`
@@ -148,6 +151,9 @@ type ChatRequest struct {
// Model is the model name, as in [GenerateRequest]. // Model is the model name, as in [GenerateRequest].
Model string `json:"model"` Model string `json:"model"`
// Runner selects a runner variant from a manifest list.
Runner string `json:"runner,omitempty"`
// Messages is the messages of the chat - can be used to keep a chat memory. // Messages is the messages of the chat - can be used to keep a chat memory.
Messages []Message `json:"messages"` Messages []Message `json:"messages"`
@@ -675,6 +681,9 @@ type CreateRequest struct {
// From is the name of the model or file to use as the source. // From is the name of the model or file to use as the source.
From string `json:"from,omitempty"` From string `json:"from,omitempty"`
// List is the list of local model tags to include in a manifest list.
List []string `json:"list,omitempty"`
// RemoteHost is the URL of the upstream ollama API for the model (if any). // RemoteHost is the URL of the upstream ollama API for the model (if any).
RemoteHost string `json:"remote_host,omitempty"` RemoteHost string `json:"remote_host,omitempty"`
@@ -724,8 +733,10 @@ type DeleteRequest struct {
// ShowRequest is the request passed to [Client.Show]. // ShowRequest is the request passed to [Client.Show].
type ShowRequest struct { type ShowRequest struct {
Model string `json:"model"` Model string `json:"model"`
System string `json:"system"` Runner string `json:"runner,omitempty"`
AllManifests bool `json:"all_manifests,omitempty"`
System string `json:"system"`
// Template is deprecated // Template is deprecated
Template string `json:"template"` Template string `json:"template"`
@@ -758,6 +769,18 @@ type ShowResponse struct {
Requires string `json:"requires,omitempty"` Requires string `json:"requires,omitempty"`
} }
// ShowManifest is a single manifest summary returned from [Client.ShowManifests].
type ShowManifest struct {
Runner string `json:"runner,omitempty"`
ShowResponse
}
// ShowManifestsResponse is the response returned from [Client.ShowManifests].
type ShowManifestsResponse struct {
Manifests []ShowManifest `json:"manifests"`
License string `json:"license,omitempty"`
}
// CopyRequest is the request passed to [Client.Copy]. // CopyRequest is the request passed to [Client.Copy].
type CopyRequest struct { type CopyRequest struct {
Source string `json:"source"` Source string `json:"source"`
@@ -829,6 +852,7 @@ type ProcessModelResponse struct {
ExpiresAt time.Time `json:"expires_at"` ExpiresAt time.Time `json:"expires_at"`
SizeVRAM int64 `json:"size_vram"` SizeVRAM int64 `json:"size_vram"`
ContextLength int `json:"context_length"` ContextLength int `json:"context_length"`
Runner string `json:"runner,omitempty"`
} }
type TokenResponse struct { type TokenResponse struct {

View File

@@ -98,11 +98,11 @@ func init() {
const ConnectInstructions = "If your browser did not open, navigate to:\n %s\n\n" const ConnectInstructions = "If your browser did not open, navigate to:\n %s\n\n"
// ensureThinkingSupport emits a warning if the model does not advertise thinking support // ensureThinkingSupport emits a warning if the model does not advertise thinking support
func ensureThinkingSupport(ctx context.Context, client *api.Client, name string) { func ensureThinkingSupport(ctx context.Context, client *api.Client, name, runner string) {
if name == "" { if name == "" {
return return
} }
resp, err := client.Show(ctx, &api.ShowRequest{Model: name}) resp, err := client.Show(ctx, &api.ShowRequest{Model: name, Runner: runner})
if err != nil { if err != nil {
return return
} }
@@ -156,6 +156,45 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return fmt.Errorf("invalid model name: %s", modelName) return fmt.Errorf("invalid model name: %s", modelName)
} }
list, _ := cmd.Flags().GetStringSlice("combine")
if len(list) > 0 {
if experimental, _ := cmd.Flags().GetBool("experimental"); experimental {
return errors.New("--combine cannot be used with --experimental")
}
if quantize, _ := cmd.Flags().GetString("quantize"); quantize != "" {
return errors.New("--combine cannot be used with --quantize")
}
if cmd.Flags().Changed("file") {
return errors.New("--combine cannot be used with --file")
}
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
req := &api.CreateRequest{
Model: modelName,
List: list,
}
status := "creating manifest list"
spinner := progress.NewSpinner(status)
p.Add(status, spinner)
fn := func(resp api.ProgressResponse) error {
if status != resp.Status {
spinner.Stop()
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
}
return nil
}
return client.Create(cmd.Context(), req, fn)
}
// Check for --experimental flag for safetensors model creation // Check for --experimental flag for safetensors model creation
// This gates both safetensors LLM and imagegen model creation // This gates both safetensors LLM and imagegen model creation
experimental, _ := cmd.Flags().GetBool("experimental") experimental, _ := cmd.Flags().GetBool("experimental")
@@ -399,7 +438,7 @@ func loadOrUnloadModel(cmd *cobra.Command, opts *runOptions) error {
requestedCloud := modelref.HasExplicitCloudSource(opts.Model) requestedCloud := modelref.HasExplicitCloudSource(opts.Model)
if info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: opts.Model}); err != nil { if info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: opts.Model, Runner: opts.Runner}); err != nil {
return err return err
} else if info.RemoteHost != "" || requestedCloud { } else if info.RemoteHost != "" || requestedCloud {
// Cloud model, no need to load/unload // Cloud model, no need to load/unload
@@ -431,6 +470,7 @@ func loadOrUnloadModel(cmd *cobra.Command, opts *runOptions) error {
req := &api.GenerateRequest{ req := &api.GenerateRequest{
Model: opts.Model, Model: opts.Model,
Runner: opts.Runner,
KeepAlive: opts.KeepAlive, KeepAlive: opts.KeepAlive,
// pass Think here so we fail before getting to the chat prompt if the model doesn't support it // pass Think here so we fail before getting to the chat prompt if the model doesn't support it
@@ -562,6 +602,14 @@ func RunHandler(cmd *cobra.Command, args []string) error {
ShowConnect: true, ShowConnect: true,
} }
if flag := cmd.Flags().Lookup("runner"); flag != nil {
runner, err := cmd.Flags().GetString("runner")
if err != nil {
return err
}
opts.Runner = runner
}
format, err := cmd.Flags().GetString("format") format, err := cmd.Flags().GetString("format")
if err != nil { if err != nil {
return err return err
@@ -651,7 +699,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
requestedCloud := modelref.HasExplicitCloudSource(name) requestedCloud := modelref.HasExplicitCloudSource(name)
info, err := func() (*api.ShowResponse, error) { info, err := func() (*api.ShowResponse, error) {
showReq := &api.ShowRequest{Name: name} showReq := &api.ShowRequest{Name: name, Runner: opts.Runner}
info, err := client.Show(cmd.Context(), showReq) info, err := client.Show(cmd.Context(), showReq)
var se api.StatusError var se api.StatusError
if errors.As(err, &se) && se.StatusCode == http.StatusNotFound { if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
@@ -661,7 +709,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
if err := PullHandler(cmd, []string{name}); err != nil { if err := PullHandler(cmd, []string{name}); err != nil {
return nil, err return nil, err
} }
return client.Show(cmd.Context(), &api.ShowRequest{Name: name}) return client.Show(cmd.Context(), &api.ShowRequest{Name: name, Runner: opts.Runner})
} }
return info, err return info, err
}() }()
@@ -761,7 +809,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
// Use experimental agent loop with tools // Use experimental agent loop with tools
if isExperimental { if isExperimental {
return xcmd.GenerateInteractive(cmd, opts.Model, opts.WordWrap, opts.Options, opts.Think, opts.HideThinking, opts.KeepAlive, yoloMode, enableWebsearch) return xcmd.GenerateInteractive(cmd, opts.Model, opts.Runner, opts.WordWrap, opts.Options, opts.Think, opts.HideThinking, opts.KeepAlive, yoloMode, enableWebsearch)
} }
return generateInteractive(cmd, opts) return generateInteractive(cmd, opts)
@@ -1000,12 +1048,12 @@ func ListRunningHandler(cmd *cobra.Command, args []string) error {
until = format.HumanTime(m.ExpiresAt, "Never") until = format.HumanTime(m.ExpiresAt, "Never")
} }
ctxStr := strconv.Itoa(m.ContextLength) ctxStr := strconv.Itoa(m.ContextLength)
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, ctxStr, until}) data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, ctxStr, m.Runner, until})
} }
} }
table := tablewriter.NewWriter(os.Stdout) table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"NAME", "ID", "SIZE", "PROCESSOR", "CONTEXT", "UNTIL"}) table.SetHeader([]string{"NAME", "ID", "SIZE", "PROCESSOR", "CONTEXT", "RUNNER", "UNTIL"})
table.SetHeaderAlignment(tablewriter.ALIGN_LEFT) table.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
table.SetAlignment(tablewriter.ALIGN_LEFT) table.SetAlignment(tablewriter.ALIGN_LEFT)
table.SetHeaderLine(false) table.SetHeaderLine(false)
@@ -1095,6 +1143,21 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
} }
req := api.ShowRequest{Name: args[0], Verbose: verbose} req := api.ShowRequest{Name: args[0], Verbose: verbose}
if flagsSet == 0 && !verbose {
resp, err := client.ShowManifests(cmd.Context(), &req)
if err != nil {
return err
}
if len(resp.Manifests) > 1 {
return showManifestListInfo(resp, os.Stdout)
}
if len(resp.Manifests) == 1 {
return showInfo(&resp.Manifests[0].ShowResponse, verbose, os.Stdout)
}
return nil
}
resp, err := client.Show(cmd.Context(), &req) resp, err := client.Show(cmd.Context(), &req)
if err != nil { if err != nil {
return err return err
@@ -1120,6 +1183,211 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
return showInfo(resp, verbose, os.Stdout) return showInfo(resp, verbose, os.Stdout)
} }
func showManifestListInfo(resp *api.ShowManifestsResponse, w io.Writer) error {
tableRender := func(header string, rows func() [][]string) {
fmt.Fprintln(w, " ", header)
table := tablewriter.NewWriter(w)
table.SetAlignment(tablewriter.ALIGN_LEFT)
table.SetBorder(false)
table.SetNoWhiteSpace(true)
table.SetTablePadding(" ")
if header == "License" {
table.SetColWidth(100)
}
table.AppendBulk(rows())
table.Render()
fmt.Fprintln(w)
}
runners := make([]string, len(resp.Manifests))
for i, m := range resp.Manifests {
runners[i] = m.Runner
if runners[i] == "" {
runners[i] = fmt.Sprintf("manifest %d", i+1)
}
}
headerRow := func(labelColumn bool) []string {
row := []string{""}
if labelColumn {
row = append(row, "")
}
return append(row, runners...)
}
tableRender("Model", func() (rows [][]string) {
rows = append(rows, headerRow(true))
for _, field := range []struct {
name string
value func(api.ShowResponse) string
}{
{"architecture", showArchitecture},
{"parameters", showParameterSize},
{"context length", func(resp api.ShowResponse) string { return showModelInfoNumber(resp, "context_length") }},
{"embedding length", func(resp api.ShowResponse) string { return showModelInfoNumber(resp, "embedding_length") }},
{"quantization", func(resp api.ShowResponse) string { return resp.Details.QuantizationLevel }},
{"requires", func(resp api.ShowResponse) string { return resp.Requires }},
} {
row := []string{"", field.name}
hasValue := false
for _, m := range resp.Manifests {
value := field.value(m.ShowResponse)
if value != "" {
hasValue = true
}
row = append(row, value)
}
if hasValue {
rows = append(rows, row)
}
}
return rows
})
capabilities := showCapabilities(resp.Manifests)
if len(capabilities) > 0 {
tableRender("Capabilities", func() (rows [][]string) {
rows = append(rows, headerRow(false))
for _, capability := range capabilities {
row := []string{""}
for _, m := range resp.Manifests {
if slices.Contains(m.Capabilities, capability) {
row = append(row, capability.String())
} else {
row = append(row, "")
}
}
rows = append(rows, row)
}
return rows
})
}
parameterKeys, parameterValues := showParameterValues(resp.Manifests)
if len(parameterKeys) > 0 {
tableRender("Parameters", func() (rows [][]string) {
rows = append(rows, headerRow(true))
for _, key := range parameterKeys {
row := []string{"", key}
for _, values := range parameterValues {
row = append(row, values[key])
}
rows = append(rows, row)
}
return rows
})
}
if resp.License != "" {
tableRender("License", func() [][]string {
return showHeadRows(resp.License, 2)
})
}
return nil
}
func showCapabilities(manifests []api.ShowManifest) []model.Capability {
seen := make(map[model.Capability]struct{})
var capabilities []model.Capability
for _, m := range manifests {
for _, capability := range m.Capabilities {
if _, ok := seen[capability]; ok {
continue
}
seen[capability] = struct{}{}
capabilities = append(capabilities, capability)
}
}
return capabilities
}
func showArchitecture(resp api.ShowResponse) string {
if resp.ModelInfo != nil {
if arch, _ := resp.ModelInfo["general.architecture"].(string); arch != "" {
return arch
}
}
return resp.Details.Family
}
func showParameterSize(resp api.ShowResponse) string {
if resp.Details.ParameterSize != "" {
return resp.Details.ParameterSize
}
if resp.ModelInfo != nil {
if v, ok := resp.ModelInfo["general.parameter_count"]; ok {
if f, ok := v.(float64); ok {
return format.HumanNumber(uint64(f))
}
}
}
return ""
}
func showModelInfoNumber(resp api.ShowResponse, key string) string {
if resp.ModelInfo == nil {
return ""
}
arch, _ := resp.ModelInfo["general.architecture"].(string)
if arch == "" {
return ""
}
if v, ok := resp.ModelInfo[fmt.Sprintf("%s.%s", arch, key)]; ok {
if f, ok := v.(float64); ok {
return strconv.FormatFloat(f, 'f', -1, 64)
}
}
return ""
}
func showParameterValues(manifests []api.ShowManifest) ([]string, []map[string]string) {
seen := make(map[string]struct{})
var keys []string
values := make([]map[string]string, len(manifests))
for i, m := range manifests {
values[i] = make(map[string]string)
scanner := bufio.NewScanner(strings.NewReader(m.Parameters))
for scanner.Scan() {
fields := strings.Fields(scanner.Text())
if len(fields) == 0 {
continue
}
key := fields[0]
values[i][key] = strings.Join(fields[1:], " ")
if _, ok := seen[key]; !ok {
seen[key] = struct{}{}
keys = append(keys, key)
}
}
}
return keys, values
}
func showHeadRows(s string, n int) (rows [][]string) {
scanner := bufio.NewScanner(strings.NewReader(s))
count := 0
for scanner.Scan() {
text := strings.TrimSpace(scanner.Text())
if text == "" {
continue
}
count++
if n < 0 || count <= n {
rows = append(rows, []string{"", text})
}
}
if n >= 0 && count > n {
rows = append(rows, []string{"", "..."})
}
return
}
func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error { func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
tableRender := func(header string, rows func() [][]string) { tableRender := func(header string, rows func() [][]string) {
fmt.Fprintln(w, " ", header) fmt.Fprintln(w, " ", header)
@@ -1285,34 +1553,15 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
}) })
} }
head := func(s string, n int) (rows [][]string) {
scanner := bufio.NewScanner(strings.NewReader(s))
count := 0
for scanner.Scan() {
text := strings.TrimSpace(scanner.Text())
if text == "" {
continue
}
count++
if n < 0 || count <= n {
rows = append(rows, []string{"", text})
}
}
if n >= 0 && count > n {
rows = append(rows, []string{"", "..."})
}
return
}
if resp.System != "" { if resp.System != "" {
tableRender("System", func() [][]string { tableRender("System", func() [][]string {
return head(resp.System, 2) return showHeadRows(resp.System, 2)
}) })
} }
if resp.License != "" { if resp.License != "" {
tableRender("License", func() [][]string { tableRender("License", func() [][]string {
return head(resp.License, 2) return showHeadRows(resp.License, 2)
}) })
} }
@@ -1412,6 +1661,7 @@ type generateContextKey string
type runOptions struct { type runOptions struct {
Model string Model string
Runner string
ParentModel string ParentModel string
LoadedMessages []api.Message LoadedMessages []api.Message
Prompt string Prompt string
@@ -1463,6 +1713,7 @@ func (r runOptions) Copy() runOptions {
return runOptions{ return runOptions{
Model: r.Model, Model: r.Model,
Runner: r.Runner,
ParentModel: r.ParentModel, ParentModel: r.ParentModel,
LoadedMessages: loadedMessages, LoadedMessages: loadedMessages,
Prompt: r.Prompt, Prompt: r.Prompt,
@@ -1646,6 +1897,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
req := &api.ChatRequest{ req := &api.ChatRequest{
Model: opts.Model, Model: opts.Model,
Runner: opts.Runner,
Messages: opts.Messages, Messages: opts.Messages,
Format: json.RawMessage(opts.Format), Format: json.RawMessage(opts.Format),
Options: opts.Options, Options: opts.Options,
@@ -1778,6 +2030,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
request := api.GenerateRequest{ request := api.GenerateRequest{
Model: opts.Model, Model: opts.Model,
Runner: opts.Runner,
Prompt: opts.Prompt, Prompt: opts.Prompt,
Context: generateContext, Context: generateContext,
Images: opts.Images, Images: opts.Images,
@@ -2121,6 +2374,7 @@ func NewCLI() *cobra.Command {
} }
createCmd.Flags().StringP("file", "f", "", "Name of the Modelfile (default \"Modelfile\")") createCmd.Flags().StringP("file", "f", "", "Name of the Modelfile (default \"Modelfile\")")
createCmd.Flags().StringSlice("combine", nil, "Create a manifest list from comma-separated local models")
createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_K_M)") createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_K_M)")
createCmd.Flags().Bool("experimental", false, "Enable experimental safetensors model creation") createCmd.Flags().Bool("experimental", false, "Enable experimental safetensors model creation")
@@ -2152,6 +2406,8 @@ func NewCLI() *cobra.Command {
runCmd.Flags().Bool("insecure", false, "Use an insecure registry") runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically") runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
runCmd.Flags().String("format", "", "Response format (e.g. json)") runCmd.Flags().String("format", "", "Response format (e.g. json)")
runCmd.Flags().String("runner", "", "Runner to use for manifest list selection (mlx, ggml, llamacpp)")
runCmd.Flags().MarkHidden("runner")
runCmd.Flags().String("think", "", "Enable thinking mode: true/false or high/medium/low for supported models") runCmd.Flags().String("think", "", "Enable thinking mode: true/false or high/medium/low for supported models")
runCmd.Flags().Lookup("think").NoOptDefVal = "true" runCmd.Flags().Lookup("think").NoOptDefVal = "true"
runCmd.Flags().Bool("hidethinking", false, "Hide thinking output (if provided)") runCmd.Flags().Bool("hidethinking", false, "Hide thinking output (if provided)")

View File

@@ -326,6 +326,93 @@ Weigh anchor!
}) })
} }
func TestShowManifestListInfo(t *testing.T) {
var b bytes.Buffer
if err := showManifestListInfo(&api.ShowManifestsResponse{
Manifests: []api.ShowManifest{
{
Runner: "mlx",
ShowResponse: api.ShowResponse{
ModelInfo: map[string]any{
"general.architecture": "qwen3_5_moe",
"general.parameter_count": float64(35_100_000_000),
"qwen3_5_moe.context_length": float64(262144),
"qwen3_5_moe.embedding_length": float64(2048),
},
Details: api.ModelDetails{
ParameterSize: "35.1B",
QuantizationLevel: "nvfp4",
},
Requires: "0.19.0",
Capabilities: []model.Capability{model.CapabilityCompletion, model.CapabilityVision, model.CapabilityThinking, model.CapabilityTools},
Parameters: "min_p 0\npresence_penalty 1.5\nrepeat_penalty 1\ntemperature 1\ntop_k 20\ntop_p 0.95\n",
},
},
{
Runner: "ggml",
ShowResponse: api.ShowResponse{
ModelInfo: map[string]any{
"general.architecture": "qwen35moe",
"qwen35moe.context_length": float64(262144),
"qwen35moe.embedding_length": float64(2048),
},
Details: api.ModelDetails{
ParameterSize: "36.0B",
QuantizationLevel: "Q4_K_M",
},
Capabilities: []model.Capability{model.CapabilityCompletion, model.CapabilityVision, model.CapabilityTools, model.CapabilityThinking},
Parameters: "min_p 0\npresence_penalty 1.5\nrepeat_penalty 1\ntemperature 1\ntop_k 20\ntop_p 0.95\n",
},
},
},
License: "Apache License\nVersion 2.0, January 2004\nterms",
}, &b); err != nil {
t.Fatal(err)
}
expect := ` Model
mlx ggml
architecture qwen3_5_moe qwen35moe
parameters 35.1B 36.0B
context length 262144 262144
embedding length 2048 2048
quantization nvfp4 Q4_K_M
requires 0.19.0
Capabilities
mlx ggml
completion completion
vision vision
thinking thinking
tools tools
Parameters
mlx ggml
min_p 0 0
presence_penalty 1.5 1.5
repeat_penalty 1 1
temperature 1 1
top_k 20 20
top_p 0.95 0.95
License
Apache License
Version 2.0, January 2004
...
`
trimLinePadding := func(s string) string {
lines := strings.Split(s, "\n")
for i, line := range lines {
lines[i] = strings.TrimRight(line, " \t\r")
}
return strings.Join(lines, "\n")
}
if diff := cmp.Diff(trimLinePadding(expect), trimLinePadding(b.String())); diff != "" {
t.Errorf("unexpected output (-want +got):\n%s", diff)
}
}
func TestDeleteHandler(t *testing.T) { func TestDeleteHandler(t *testing.T) {
stopped := false stopped := false
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -479,6 +566,143 @@ func TestRunEmbeddingModel(t *testing.T) {
} }
} }
func TestListRunningHandlerShowsRunner(t *testing.T) {
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/ps" || r.Method != http.MethodGet {
http.NotFound(w, r)
return
}
if err := json.NewEncoder(w).Encode(api.ProcessResponse{
Models: []api.ProcessModelResponse{
{
Name: "test-model:latest",
Model: "test-model:latest",
Digest: "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890",
Size: 1024,
SizeVRAM: 1024,
ContextLength: 4096,
Runner: "mlx",
ExpiresAt: time.Now().Add(time.Hour),
},
},
}); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}))
t.Setenv("OLLAMA_HOST", mockServer.URL)
t.Cleanup(mockServer.Close)
cmd := &cobra.Command{}
cmd.SetContext(t.Context())
oldStdout := os.Stdout
r, w, _ := os.Pipe()
os.Stdout = w
err := ListRunningHandler(cmd, nil)
w.Close()
os.Stdout = oldStdout
if err != nil {
t.Fatal(err)
}
out, err := io.ReadAll(r)
if err != nil {
t.Fatal(err)
}
got := string(out)
for _, want := range []string{"CONTEXT", "RUNNER", "abcdef123456", "mlx"} {
if !strings.Contains(got, want) {
t.Fatalf("output missing %q:\n%s", want, got)
}
}
}
func TestRunHandlerRunnerFlag(t *testing.T) {
showReqCh := make(chan api.ShowRequest, 1)
generateReqCh := make(chan api.GenerateRequest, 1)
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.URL.Path == "/api/show" && r.Method == http.MethodPost:
var req api.ShowRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
showReqCh <- req
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(api.ShowResponse{
Capabilities: []model.Capability{model.CapabilityCompletion},
ModelInfo: map[string]any{},
}); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
case r.URL.Path == "/api/generate" && r.Method == http.MethodPost:
var req api.GenerateRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
generateReqCh <- req
w.Header().Set("Content-Type", "application/x-ndjson")
if err := json.NewEncoder(w).Encode(api.GenerateResponse{
Model: "test-model",
Done: true,
}); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
default:
http.NotFound(w, r)
}
}))
t.Setenv("OLLAMA_HOST", mockServer.URL)
t.Cleanup(mockServer.Close)
cmd := &cobra.Command{}
cmd.SetContext(t.Context())
cmd.Flags().String("keepalive", "", "")
cmd.Flags().Bool("verbose", false, "")
cmd.Flags().Bool("insecure", false, "")
cmd.Flags().Bool("nowordwrap", false, "")
cmd.Flags().String("format", "", "")
cmd.Flags().String("runner", "", "")
cmd.Flags().String("think", "", "")
cmd.Flags().Bool("hidethinking", false, "")
if err := cmd.Flags().Set("runner", "llamacpp"); err != nil {
t.Fatal(err)
}
oldStdout := os.Stdout
r, w, _ := os.Pipe()
os.Stdout = w
err := RunHandler(cmd, []string{"test-model", "hello"})
w.Close()
os.Stdout = oldStdout
if _, readErr := io.ReadAll(r); readErr != nil {
t.Fatal(readErr)
}
if err != nil {
t.Fatal(err)
}
select {
case req := <-showReqCh:
if req.Runner != "llamacpp" {
t.Fatalf("show runner = %q, want %q", req.Runner, "llamacpp")
}
default:
t.Fatal("server did not receive show request")
}
select {
case req := <-generateReqCh:
if req.Runner != "llamacpp" {
t.Fatalf("generate runner = %q, want %q", req.Runner, "llamacpp")
}
default:
t.Fatal("server did not receive generate request")
}
}
func TestRunEmbeddingModelWithFlags(t *testing.T) { func TestRunEmbeddingModelWithFlags(t *testing.T) {
reqCh := make(chan api.EmbedRequest, 1) reqCh := make(chan api.EmbedRequest, 1)
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -1524,6 +1748,66 @@ func TestCreateHandler(t *testing.T) {
} }
} }
func TestCreateHandlerManifestList(t *testing.T) {
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/create" {
t.Errorf("unexpected request to %s", r.URL.Path)
http.Error(w, "not found", http.StatusNotFound)
return
}
if r.Method != http.MethodPost {
t.Errorf("expected POST request, got %s", r.Method)
}
var req api.CreateRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
if req.Model != "parent" {
t.Errorf("model = %q, want %q", req.Model, "parent")
}
if !cmp.Equal(req.List, []string{"gguf", "safetensors"}) {
t.Errorf("list = %#v, want %#v", req.List, []string{"gguf", "safetensors"})
}
if req.From != "" || len(req.Files) > 0 {
t.Errorf("manifest list create sent normal create fields: from=%q files=%v", req.From, req.Files)
}
if err := json.NewEncoder(w).Encode(api.ProgressResponse{Status: "success"}); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.(http.Flusher).Flush()
}))
t.Setenv("OLLAMA_HOST", mockServer.URL)
t.Cleanup(mockServer.Close)
cmd := &cobra.Command{}
cmd.Flags().String("file", "", "")
cmd.Flags().String("quantize", "", "")
cmd.Flags().Bool("experimental", false, "")
cmd.Flags().StringSlice("combine", nil, "")
cmd.SetContext(t.Context())
if err := cmd.Flags().Set("combine", "gguf,safetensors"); err != nil {
t.Fatal(err)
}
oldStderr := os.Stderr
r, w, _ := os.Pipe()
os.Stderr = w
err := CreateHandler(cmd, []string{"parent"})
w.Close()
os.Stderr = oldStderr
if _, readErr := io.ReadAll(r); readErr != nil {
t.Fatal(readErr)
}
if err != nil {
t.Fatal(err)
}
}
func TestNewCreateRequest(t *testing.T) { func TestNewCreateRequest(t *testing.T) {
tests := []struct { tests := []struct {
name string name string

View File

@@ -224,7 +224,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
opts.Messages = []api.Message{} opts.Messages = []api.Message{}
opts.LoadedMessages = nil opts.LoadedMessages = nil
fmt.Printf("Loading model '%s'\n", opts.Model) fmt.Printf("Loading model '%s'\n", opts.Model)
info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: opts.Model}) info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: opts.Model, Runner: opts.Runner})
if err != nil { if err != nil {
if strings.Contains(err.Error(), "not found") { if strings.Contains(err.Error(), "not found") {
fmt.Printf("Couldn't find model '%s'\n", opts.Model) fmt.Printf("Couldn't find model '%s'\n", opts.Model)
@@ -323,7 +323,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
opts.Think = &thinkValue opts.Think = &thinkValue
thinkExplicitlySet = true thinkExplicitlySet = true
if client, err := api.ClientFromEnvironment(); err == nil { if client, err := api.ClientFromEnvironment(); err == nil {
ensureThinkingSupport(cmd.Context(), client, opts.Model) ensureThinkingSupport(cmd.Context(), client, opts.Model, opts.Runner)
} }
if maybeLevel != "" { if maybeLevel != "" {
fmt.Printf("Set 'think' mode to '%s'.\n", maybeLevel) fmt.Printf("Set 'think' mode to '%s'.\n", maybeLevel)
@@ -334,7 +334,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
opts.Think = &api.ThinkValue{Value: false} opts.Think = &api.ThinkValue{Value: false}
thinkExplicitlySet = true thinkExplicitlySet = true
if client, err := api.ClientFromEnvironment(); err == nil { if client, err := api.ClientFromEnvironment(); err == nil {
ensureThinkingSupport(cmd.Context(), client, opts.Model) ensureThinkingSupport(cmd.Context(), client, opts.Model, opts.Runner)
} }
fmt.Println("Set 'nothink' mode.") fmt.Println("Set 'nothink' mode.")
case "format": case "format":
@@ -414,6 +414,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
} }
req := &api.ShowRequest{ req := &api.ShowRequest{
Name: opts.Model, Name: opts.Model,
Runner: opts.Runner,
System: opts.System, System: opts.System,
Options: opts.Options, Options: opts.Options,
} }

View File

@@ -47,7 +47,7 @@ func TestWarnMissingThinking(t *testing.T) {
oldStderr := os.Stderr oldStderr := os.Stderr
r, w, _ := os.Pipe() r, w, _ := os.Pipe()
os.Stderr = w os.Stderr = w
ensureThinkingSupport(t.Context(), client, "m") ensureThinkingSupport(t.Context(), client, "m", "")
w.Close() w.Close()
os.Stderr = oldStderr os.Stderr = oldStderr
out, _ := io.ReadAll(r) out, _ := io.ReadAll(r)

View File

@@ -120,6 +120,18 @@ type ollamaServer struct {
tokenizer tokenizer.Tokenizer // tokenizer handles text encoding/decoding tokenizer tokenizer.Tokenizer // tokenizer handles text encoding/decoding
} }
// RunnerName returns the runner implementation name for a LlamaServer.
func RunnerName(s LlamaServer) string {
switch s.(type) {
case *ollamaServer:
return "ggml"
case *llamaServer:
return "llamacpp"
default:
return ""
}
}
// LoadModel will load a model from disk. The model must be in the GGML format. // LoadModel will load a model from disk. The model must be in the GGML format.
// //
// It collects array values for arrays with a size less than or equal to // It collects array values for arrays with a size less than or equal to

View File

@@ -123,25 +123,6 @@ func (l *Layer) Remove() error {
return nil return nil
} }
// Ignore corrupt manifests to avoid blocking deletion of layers that are freshly orphaned _, err := RemoveUnreferencedBlobs(l.Digest)
ms, err := Manifests(true) return err
if err != nil {
return err
}
for _, m := range ms {
for _, layer := range append(m.Layers, m.Config) {
if layer.Digest == l.Digest {
// something is using this layer
return nil
}
}
}
blob, err := BlobsPath(l.Digest)
if err != nil {
return err
}
return os.Remove(blob)
} }

File diff suppressed because it is too large Load Diff

View File

@@ -1,19 +1,24 @@
package manifest package manifest
import ( import (
"bytes"
"crypto/sha256"
"encoding/json" "encoding/json"
"errors"
"fmt"
"os" "os"
"path/filepath" "path/filepath"
"slices" "slices"
"strings"
"testing" "testing"
"github.com/ollama/ollama/types/model" "github.com/ollama/ollama/types/model"
) )
func createManifest(t *testing.T, path, name string) { func createManifestAtRoot(t *testing.T, path, root, name string) {
t.Helper() t.Helper()
p := filepath.Join(path, "manifests", name) p := filepath.Join(path, root, name)
if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil { if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil {
t.Fatal(err) t.Fatal(err)
} }
@@ -29,6 +34,735 @@ func createManifest(t *testing.T, path, name string) {
} }
} }
func createManifest(t *testing.T, path, name string) {
t.Helper()
createManifestAtRoot(t, path, "manifests", name)
}
func createManifestForTest(configDigest, layerDigest, runner string) Manifest {
return Manifest{
SchemaVersion: 2,
MediaType: MediaTypeManifest,
Runner: runner,
Format: FormatGGUF,
Config: Layer{
MediaType: "application/vnd.docker.container.image.v1+json",
Digest: configDigest,
Size: 12,
},
Layers: []Layer{
{
MediaType: "application/vnd.ollama.image.model",
Digest: layerDigest,
Size: 34,
},
},
}
}
func createManifestListData(t *testing.T, manifests ...Manifest) []byte {
t.Helper()
ml := Manifest{
SchemaVersion: 2,
MediaType: MediaTypeManifestList,
Manifests: manifests,
}
data, err := json.Marshal(ml)
if err != nil {
t.Fatal(err)
}
return data
}
func writeManifestBlobForTest(t *testing.T, data []byte) string {
t.Helper()
digest, err := writeManifestBlob(data)
if err != nil {
t.Fatal(err)
}
return digest
}
func TestWriteManifestStoresManifestAsBlob(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
config := Layer{
MediaType: "application/vnd.docker.container.image.v1+json",
Digest: "sha256:" + strings.Repeat("a", 64),
Size: 12,
}
if err := WriteManifest(name, config, nil); err != nil {
t.Fatal(err)
}
manifestPath, err := V2PathForName(name)
if err != nil {
t.Fatal(err)
}
manifestData, err := os.ReadFile(manifestPath)
if err != nil {
t.Fatal(err)
}
sum := sha256.Sum256(manifestData)
digest := fmt.Sprintf("sha256:%x", sum)
blobPath, err := BlobsPath(digest)
if err != nil {
t.Fatal(err)
}
blobData, err := os.ReadFile(blobPath)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(blobData, manifestData) {
t.Fatal("manifest path and blob content differ")
}
m, err := ParseNamedManifest(name)
if err != nil {
t.Fatal(err)
}
if got := m.Digest(); got != fmt.Sprintf("%x", sum) {
t.Fatalf("digest = %q, want %x", got, sum)
}
if got := m.BlobDigest(); got != digest {
t.Fatalf("blob digest = %q, want %q", got, digest)
}
}
func TestSelectManifestUsesRunnerPreference(t *testing.T) {
ml := Manifest{
SchemaVersion: 2,
MediaType: MediaTypeManifestList,
Manifests: []Manifest{
createManifestForTest("sha256:"+strings.Repeat("a", 64), "sha256:"+strings.Repeat("b", 64), RunnerGGML),
createManifestForTest("sha256:"+strings.Repeat("c", 64), "sha256:"+strings.Repeat("d", 64), RunnerLlamaCPP),
},
}
child, err := selectManifestWithPreferences(ml.Manifests, []string{RunnerLlamaCPP, RunnerGGML})
if err != nil {
t.Fatal(err)
}
if child.Runner != RunnerLlamaCPP {
t.Fatalf("runner = %q, want %q", child.Runner, RunnerLlamaCPP)
}
}
func TestSelectManifestReferenceDoesNotResolveBlob(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
ref, err := NewManifestReference("sha256:"+strings.Repeat("a", 64), RunnerGGML, FormatGGUF)
if err != nil {
t.Fatal(err)
}
child, err := selectManifestReferenceWithPreferences([]Manifest{ref}, []string{RunnerGGML})
if err != nil {
t.Fatal(err)
}
if got := child.BlobDigest(); got != "sha256:"+strings.Repeat("a", 64) {
t.Fatalf("blob digest = %q, want selected reference digest", got)
}
}
func TestSelectManifestRejectsOldOllamaRunner(t *testing.T) {
_, err := selectManifestWithPreferences([]Manifest{
createManifestForTest("sha256:"+strings.Repeat("a", 64), "sha256:"+strings.Repeat("b", 64), "ollama"),
}, []string{RunnerGGML})
if !errors.Is(err, ErrNoCompatibleManifest) {
t.Fatalf("err = %v, want %v", err, ErrNoCompatibleManifest)
}
}
func TestParseNamedManifestResolvesManifestList(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
ggml := createManifestForTest("sha256:"+strings.Repeat("a", 64), "sha256:"+strings.Repeat("b", 64), RunnerGGML)
ggmlData, err := json.Marshal(ggml)
if err != nil {
t.Fatal(err)
}
ggmlDigest := writeManifestBlobForTest(t, ggmlData)
llamacpp := createManifestForTest("sha256:"+strings.Repeat("c", 64), "sha256:"+strings.Repeat("d", 64), RunnerLlamaCPP)
llamacppData, err := json.Marshal(llamacpp)
if err != nil {
t.Fatal(err)
}
llamacppDigest := writeManifestBlobForTest(t, llamacppData)
parentData := createManifestListData(t, llamacpp, ggml)
if err := WriteManifestData(name, parentData); err != nil {
t.Fatal(err)
}
m, err := ParseNamedManifest(name)
if err != nil {
t.Fatal(err)
}
parentSum := sha256.Sum256(parentData)
if got := m.Digest(); got != fmt.Sprintf("%x", parentSum) {
t.Fatalf("digest = %q, want %x", got, parentSum)
}
if got := m.BlobDigest(); got != fmt.Sprintf("sha256:%x", parentSum) {
t.Fatalf("blob digest = %q, want sha256:%x", got, parentSum)
}
if got := m.SelectedDigest(); got != strings.TrimPrefix(ggmlDigest, "sha256:") {
t.Fatalf("selected digest = %q, want %q", got, strings.TrimPrefix(ggmlDigest, "sha256:"))
}
if got := m.Runner; got != RunnerGGML {
t.Fatalf("runner = %q, want %q", got, RunnerGGML)
}
if got := m.Format; got != FormatGGUF {
t.Fatalf("format = %q, want %q", got, FormatGGUF)
}
if got := m.Config.Digest; got != "sha256:"+strings.Repeat("a", 64) {
t.Fatalf("config digest = %q, want selected child config", got)
}
m, err = ParseNamedManifestForRunner(name, RunnerLlamaCPP)
if err != nil {
t.Fatal(err)
}
if got := m.Runner; got != RunnerLlamaCPP {
t.Fatalf("runner = %q, want %q", got, RunnerLlamaCPP)
}
if got := m.SelectedDigest(); got != strings.TrimPrefix(llamacppDigest, "sha256:") {
t.Fatalf("selected digest = %q, want %q", got, strings.TrimPrefix(llamacppDigest, "sha256:"))
}
if got := m.Config.Digest; got != "sha256:"+strings.Repeat("c", 64) {
t.Fatalf("config digest = %q, want selected child config", got)
}
referenced, err := ReferencedBlobDigestsForName(name)
if err != nil {
t.Fatal(err)
}
for _, digest := range []string{llamacppDigest, ggmlDigest} {
if !slices.Contains(referenced, digest) {
t.Fatalf("referenced blob digests missing child manifest %s", digest)
}
}
raw, err := ReadManifestData(name)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(raw, parentData) {
t.Fatal("ReadManifestData did not return the parent manifest list")
}
selected, err := ReadSelectedManifestData(name)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(selected, ggmlData) {
t.Fatal("ReadSelectedManifestData did not return the selected child manifest")
}
}
func TestTotalSizeForNameIncludesAllManifestListChildren(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
sharedLayerData := []byte("shared layer")
ggufConfigData := []byte("gguf config")
ggufLayerData := []byte("gguf layer")
mlxConfigData := []byte("mlx config")
mlxLayerData := []byte("mlx layer")
sharedLayerDigest := writeManifestBlobForTest(t, sharedLayerData)
ggufConfigDigest := writeManifestBlobForTest(t, ggufConfigData)
ggufLayerDigest := writeManifestBlobForTest(t, ggufLayerData)
mlxConfigDigest := writeManifestBlobForTest(t, mlxConfigData)
mlxLayerDigest := writeManifestBlobForTest(t, mlxLayerData)
gguf := Manifest{
SchemaVersion: 2,
MediaType: MediaTypeManifest,
Runner: RunnerGGML,
Format: FormatGGUF,
Config: Layer{
MediaType: "application/vnd.docker.container.image.v1+json",
Digest: ggufConfigDigest,
Size: int64(len(ggufConfigData)),
},
Layers: []Layer{
{
MediaType: "application/vnd.ollama.image.model",
Digest: sharedLayerDigest,
Size: int64(len(sharedLayerData)),
},
{
MediaType: "application/vnd.ollama.image.model",
Digest: ggufLayerDigest,
Size: int64(len(ggufLayerData)),
},
},
}
ggufData, err := json.Marshal(gguf)
if err != nil {
t.Fatal(err)
}
ggufManifestDigest := writeManifestBlobForTest(t, ggufData)
ggufRef, err := NewManifestReference(ggufManifestDigest, gguf.Runner, gguf.Format)
if err != nil {
t.Fatal(err)
}
mlx := Manifest{
SchemaVersion: 2,
MediaType: MediaTypeManifest,
Runner: RunnerMLX,
Format: FormatSafetensors,
Config: Layer{
MediaType: "application/vnd.docker.container.image.v1+json",
Digest: mlxConfigDigest,
Size: int64(len(mlxConfigData)),
},
Layers: []Layer{
{
MediaType: MediaTypeImageTensor,
Digest: sharedLayerDigest,
Size: int64(len(sharedLayerData)),
},
{
MediaType: MediaTypeImageTensor,
Digest: mlxLayerDigest,
Size: int64(len(mlxLayerData)),
},
},
}
mlxData, err := json.Marshal(mlx)
if err != nil {
t.Fatal(err)
}
mlxManifestDigest := writeManifestBlobForTest(t, mlxData)
mlxRef, err := NewManifestReference(mlxManifestDigest, mlx.Runner, mlx.Format)
if err != nil {
t.Fatal(err)
}
if err := WriteManifestData(name, createManifestListData(t, ggufRef, mlxRef)); err != nil {
t.Fatal(err)
}
size, err := TotalSizeForName(name)
if err != nil {
t.Fatal(err)
}
want := int64(len(ggufConfigData) + len(sharedLayerData) + len(ggufLayerData) + len(mlxConfigData) + len(mlxLayerData))
if size != want {
t.Fatalf("size = %d, want %d", size, want)
}
}
func TestPartialManifestListTracksPresentAndMissingChildren(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
configData := []byte("gguf config")
layerData := []byte("gguf layer")
configDigest := writeManifestBlobForTest(t, configData)
layerDigest := writeManifestBlobForTest(t, layerData)
child := Manifest{
SchemaVersion: 2,
MediaType: MediaTypeManifest,
Runner: RunnerGGML,
Format: FormatGGUF,
Config: Layer{
MediaType: "application/vnd.docker.container.image.v1+json",
Digest: configDigest,
Size: int64(len(configData)),
},
Layers: []Layer{
{
MediaType: "application/vnd.ollama.image.model",
Digest: layerDigest,
Size: int64(len(layerData)),
},
},
}
childData, err := json.Marshal(child)
if err != nil {
t.Fatal(err)
}
childDigest := writeManifestBlobForTest(t, childData)
childRef, err := NewManifestReference(childDigest, child.Runner, child.Format)
if err != nil {
t.Fatal(err)
}
missingDigest := "sha256:" + strings.Repeat("e", 64)
missingRef, err := NewManifestReference(missingDigest, RunnerMLX, FormatSafetensors)
if err != nil {
t.Fatal(err)
}
parentData := createManifestListData(t, childRef, missingRef)
if err := WriteManifestData(name, parentData); err != nil {
t.Fatal(err)
}
parentSum := sha256.Sum256(parentData)
parentDigest := fmt.Sprintf("sha256:%x", parentSum)
referenced, err := ReferencedBlobDigestsForName(name)
if err != nil {
t.Fatal(err)
}
for _, digest := range []string{parentDigest, childDigest, missingDigest, configDigest, layerDigest} {
if !slices.Contains(referenced, digest) {
t.Fatalf("referenced blob digests missing %s: %#v", digest, referenced)
}
}
size, err := TotalSizeForName(name)
if err != nil {
t.Fatal(err)
}
want := int64(len(configData) + len(layerData))
if size != want {
t.Fatalf("size = %d, want %d", size, want)
}
}
func TestParseNamedManifestLeavesLegacyManifestInPlace(t *testing.T) {
models := t.TempDir()
t.Setenv("OLLAMA_MODELS", models)
name := model.ParseName("example")
createManifest(t, models, name.Filepath())
manifestPath, err := PathForName(name)
if err != nil {
t.Fatal(err)
}
if _, err := ParseNamedManifest(name); err != nil {
t.Fatal(err)
}
fi, err := os.Lstat(manifestPath)
if err != nil {
t.Fatal(err)
}
if fi.Mode()&os.ModeSymlink != 0 {
t.Fatal("legacy manifest was converted to a symlink while reading")
}
data, err := os.ReadFile(manifestPath)
if err != nil {
t.Fatal(err)
}
sum := sha256.Sum256(data)
blobPath, err := BlobsPath(fmt.Sprintf("sha256:%x", sum))
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(blobPath); !os.IsNotExist(err) {
t.Fatalf("legacy manifest read created blob: %v", err)
}
}
func TestMigrateManifestLinks(t *testing.T) {
models := t.TempDir()
t.Setenv("OLLAMA_MODELS", models)
name := model.ParseName("example")
createManifest(t, models, name.Filepath())
migrated, err := MigrateManifestLinks()
if err != nil {
t.Fatal(err)
}
if migrated != 1 {
t.Fatalf("migrated = %d, want 1", migrated)
}
manifestPath, err := V2PathForName(name)
if err != nil {
t.Fatal(err)
}
manifestData, err := os.ReadFile(manifestPath)
if err != nil {
t.Fatal(err)
}
sum := sha256.Sum256(manifestData)
blobPath, err := BlobsPath(fmt.Sprintf("sha256:%x", sum))
if err != nil {
t.Fatal(err)
}
blobData, err := os.ReadFile(blobPath)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(blobData, manifestData) {
t.Fatal("migrated manifest path and blob content differ")
}
legacyPath, err := PathForName(name)
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(legacyPath); !os.IsNotExist(err) {
t.Fatalf("legacy manifest still exists: %v", err)
}
migrated, err = MigrateManifestLinks()
if err != nil {
t.Fatal(err)
}
if migrated != 0 {
t.Fatalf("migrated on second run = %d, want 0", migrated)
}
if _, err := MigrateManifestLinks(); err != nil {
t.Fatal(err)
}
manifestDataAfter, err := os.ReadFile(manifestPath)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(manifestDataAfter, manifestData) {
t.Fatal("second migration changed manifest content")
}
}
func TestRemoveNamedRemovesUnreferencedManifestBlob(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
if err := WriteManifest(name, Layer{}, nil); err != nil {
t.Fatal(err)
}
m, err := ParseNamedManifest(name)
if err != nil {
t.Fatal(err)
}
blobPath, err := BlobsPath(m.BlobDigest())
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(blobPath); err != nil {
t.Fatal(err)
}
if err := RemoveNamed(name); err != nil {
t.Fatal(err)
}
if _, err := os.Stat(blobPath); !os.IsNotExist(err) {
t.Fatalf("manifest blob still exists: %v", err)
}
}
func TestRemoveNamedTracksManifestListChildBlobs(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
ggmlConfigDigest := writeManifestBlobForTest(t, []byte("ggml config"))
ggmlLayerDigest := writeManifestBlobForTest(t, []byte("ggml layer"))
ggml := createManifestForTest(ggmlConfigDigest, ggmlLayerDigest, RunnerGGML)
ggmlData, err := json.Marshal(ggml)
if err != nil {
t.Fatal(err)
}
writeManifestBlobForTest(t, ggmlData)
llamacppConfigDigest := writeManifestBlobForTest(t, []byte("llamacpp config"))
llamacppLayerDigest := writeManifestBlobForTest(t, []byte("llamacpp layer"))
llamacpp := createManifestForTest(llamacppConfigDigest, llamacppLayerDigest, RunnerLlamaCPP)
llamacppData, err := json.Marshal(llamacpp)
if err != nil {
t.Fatal(err)
}
writeManifestBlobForTest(t, llamacppData)
parentData := createManifestListData(t, ggml, llamacpp)
nameA := model.ParseName("example-a")
nameB := model.ParseName("example-b")
if err := WriteManifestData(nameA, parentData); err != nil {
t.Fatal(err)
}
if err := WriteManifestData(nameB, parentData); err != nil {
t.Fatal(err)
}
parentSum := sha256.Sum256(parentData)
parentPath, err := BlobsPath(fmt.Sprintf("sha256:%x", parentSum))
if err != nil {
t.Fatal(err)
}
referencedBlobs, err := ReferencedBlobDigestsForName(nameA)
if err != nil {
t.Fatal(err)
}
if err := RemoveNamed(nameA); err != nil {
t.Fatal(err)
}
if _, err := os.Stat(parentPath); err != nil {
t.Fatalf("parent list blob was removed while another model uses it: %v", err)
}
for _, digest := range referencedBlobs {
blob, err := BlobsPath(digest)
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(blob); err != nil {
t.Fatalf("referenced blob %s was removed while another model uses it: %v", digest, err)
}
}
if err := RemoveNamed(nameB); err != nil {
t.Fatal(err)
}
if _, err := os.Stat(parentPath); !os.IsNotExist(err) {
t.Fatalf("parent list blob still exists after final remove: %v", err)
}
for _, digest := range referencedBlobs {
blob, err := BlobsPath(digest)
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(blob); !os.IsNotExist(err) {
t.Fatalf("referenced blob %s still exists after final remove: %v", digest, err)
}
}
}
func TestParseNamedManifestRejectsUnsafeSymlinks(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
manifestPath, err := PathForName(name)
if err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(filepath.Dir(manifestPath), 0o755); err != nil {
t.Fatal(err)
}
t.Run("non blob basename", func(t *testing.T) {
target := filepath.Join(t.TempDir(), "not-a-blob")
if err := os.WriteFile(target, []byte(`{"schemaVersion":2}`), 0o644); err != nil {
t.Fatal(err)
}
if err := os.Remove(manifestPath); err != nil && !os.IsNotExist(err) {
t.Fatal(err)
}
if err := os.Symlink(target, manifestPath); err != nil {
t.Skipf("symlink unavailable: %v", err)
}
_, err := ParseNamedManifest(name)
if err == nil || !strings.Contains(err.Error(), "not a sha256 blob") {
t.Fatalf("err = %v, want not a sha256 blob", err)
}
})
t.Run("blob basename outside blob store", func(t *testing.T) {
data := []byte(`{"schemaVersion":2,"mediaType":"application/vnd.docker.distribution.manifest.v2+json"}`)
sum := sha256.Sum256(data)
target := filepath.Join(t.TempDir(), fmt.Sprintf("sha256-%x", sum))
if err := os.WriteFile(target, data, 0o644); err != nil {
t.Fatal(err)
}
if err := os.Remove(manifestPath); err != nil && !os.IsNotExist(err) {
t.Fatal(err)
}
if err := os.Symlink(target, manifestPath); err != nil {
t.Skipf("symlink unavailable: %v", err)
}
_, err := ParseNamedManifest(name)
if err == nil || !strings.Contains(err.Error(), "does not match blob") {
t.Fatalf("err = %v, want does not match blob", err)
}
})
}
func TestParseNamedManifestPrefersV2(t *testing.T) {
models := t.TempDir()
t.Setenv("OLLAMA_MODELS", models)
name := model.ParseName("example")
legacyPath, err := PathForName(name)
if err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(filepath.Dir(legacyPath), 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(legacyPath, []byte(`{"schemaVersion":2,"mediaType":"legacy"}`), 0o644); err != nil {
t.Fatal(err)
}
if err := WriteManifestData(name, []byte(`{"schemaVersion":2,"mediaType":"v2"}`)); err != nil {
t.Fatal(err)
}
m, err := ParseNamedManifest(name)
if err != nil {
t.Fatal(err)
}
if m.MediaType != "v2" {
t.Fatalf("media type = %q, want %q", m.MediaType, "v2")
}
}
func TestManifestsV2ShadowsLegacy(t *testing.T) {
models := t.TempDir()
t.Setenv("OLLAMA_MODELS", models)
name := model.ParseName("example")
createManifest(t, models, name.Filepath())
if err := WriteManifestData(name, []byte(`{"schemaVersion":2,"mediaType":"v2"}`)); err != nil {
t.Fatal(err)
}
ms, err := Manifests(true)
if err != nil {
t.Fatal(err)
}
if len(ms) != 1 {
t.Fatalf("manifest count = %d, want 1", len(ms))
}
var m *Manifest
for gotName, gotManifest := range ms {
if gotName.EqualFold(model.ParseName("example")) {
m = gotManifest
break
}
}
if m == nil {
t.Fatalf("missing v2 manifest for %s", name)
}
if m.MediaType != "v2" {
t.Fatalf("media type = %q, want %q", m.MediaType, "v2")
}
}
func TestManifests(t *testing.T) { func TestManifests(t *testing.T) {
cases := map[string]struct { cases := map[string]struct {
ps []string ps []string

View File

@@ -14,8 +14,23 @@ import (
var ErrInvalidDigestFormat = errors.New("invalid digest format") var ErrInvalidDigestFormat = errors.New("invalid digest format")
const (
legacyDirName = "manifests"
v2DirName = "manifests-v2"
defaultPublicHost = "registry.ollama.ai"
v2CanonicalHost = "ollama.com"
)
func Path() (string, error) { func Path() (string, error) {
path := filepath.Join(envconfig.Models(), "manifests") return manifestPath(legacyDirName)
}
func V2Path() (string, error) {
return manifestPath(v2DirName)
}
func manifestPath(dir string) (string, error) {
path := filepath.Join(envconfig.Models(), dir)
if err := os.MkdirAll(path, 0o755); err != nil { if err := os.MkdirAll(path, 0o755); err != nil {
return "", fmt.Errorf("%w: ensure path elements are traversable", err) return "", fmt.Errorf("%w: ensure path elements are traversable", err)
} }
@@ -25,6 +40,10 @@ func Path() (string, error) {
// PathForName returns the path to the manifest file for a specific model name. // PathForName returns the path to the manifest file for a specific model name.
func PathForName(n model.Name) (string, error) { func PathForName(n model.Name) (string, error) {
return LegacyPathForName(n)
}
func LegacyPathForName(n model.Name) (string, error) {
if !n.IsValid() { if !n.IsValid() {
return "", os.ErrNotExist return "", os.ErrNotExist
} }
@@ -37,6 +56,162 @@ func PathForName(n model.Name) (string, error) {
return filepath.Join(manifests, n.Filepath()), nil return filepath.Join(manifests, n.Filepath()), nil
} }
func V2PathForName(n model.Name) (string, error) {
if !n.IsValid() {
return "", os.ErrNotExist
}
manifests, err := V2Path()
if err != nil {
return "", err
}
return filepath.Join(manifests, canonicalV2Name(n).Filepath()), nil
}
func ResolvePathForName(n model.Name) (string, error) {
path, _, err := resolveManifestPath(n)
return path, err
}
func resolveManifestPath(n model.Name) (string, string, error) {
if !n.IsValid() {
return "", "", os.ErrNotExist
}
v2Path, err := V2PathForName(n)
if err != nil {
return "", "", err
}
if _, err := os.Lstat(v2Path); err == nil {
root, err := V2Path()
return v2Path, root, err
} else if !os.IsNotExist(err) {
return "", "", err
}
legacyRoot, err := Path()
if err != nil {
return "", "", err
}
for _, legacyName := range legacyNameCandidates(n) {
legacyPath := filepath.Join(legacyRoot, legacyName.Filepath())
if _, err := os.Lstat(legacyPath); err == nil {
return legacyPath, legacyRoot, nil
} else if !os.IsNotExist(err) {
return "", "", err
}
}
return "", "", os.ErrNotExist
}
func removeNamedManifestPaths(n model.Name) error {
candidates := legacyNameCandidates(n)
paths := make([]string, 0, 1+len(candidates))
v2Path, err := V2PathForName(n)
if err != nil {
return err
}
paths = append(paths, v2Path)
for _, legacyName := range candidates {
legacyPath, err := LegacyPathForName(legacyName)
if err != nil {
return err
}
paths = append(paths, legacyPath)
}
for _, path := range paths {
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
return err
}
}
return pruneManifestRoots()
}
func removeLegacyManifestPaths(n model.Name) error {
for _, legacyName := range legacyNameCandidates(n) {
legacyPath, err := LegacyPathForName(legacyName)
if err != nil {
return err
}
if err := os.Remove(legacyPath); err != nil && !os.IsNotExist(err) {
return err
}
}
legacyRoot, err := Path()
if err != nil {
return err
}
if err := PruneDirectory(legacyRoot); err != nil && !os.IsNotExist(err) {
return err
}
return nil
}
func pruneManifestRoots() error {
roots := []func() (string, error){Path, V2Path}
for _, rootFn := range roots {
root, err := rootFn()
if err != nil {
return err
}
if err := PruneDirectory(root); err != nil && !os.IsNotExist(err) {
return err
}
}
return nil
}
// normalizeLogicalName maps any public host to the legacy default
// so that map keys use a single identity regardless of on-disk host.
func normalizeLogicalName(n model.Name) model.Name {
if isDefaultPublicHost(n.Host) {
n.Host = defaultPublicHost
}
return n
}
// canonicalV2Name maps any public host to the v2 canonical host
// for use in manifests-v2/ on-disk paths.
func canonicalV2Name(n model.Name) model.Name {
if isDefaultPublicHost(n.Host) {
n.Host = v2CanonicalHost
}
return n
}
func legacyNameCandidates(n model.Name) []model.Name {
names := []model.Name{n}
if !isDefaultPublicHost(n.Host) {
return names
}
alt := n
switch {
case strings.EqualFold(n.Host, defaultPublicHost):
alt.Host = v2CanonicalHost
default:
alt.Host = defaultPublicHost
}
return append(names, alt)
}
func isDefaultPublicHost(host string) bool {
return strings.EqualFold(host, defaultPublicHost) || strings.EqualFold(host, v2CanonicalHost)
}
func BlobsPath(digest string) (string, error) { func BlobsPath(digest string) (string, error) {
// only accept actual sha256 digests // only accept actual sha256 digests
pattern := "^sha256[:-][0-9a-fA-F]{64}$" pattern := "^sha256[:-][0-9a-fA-F]{64}$"

View File

@@ -102,7 +102,24 @@ func (s *Server) CreateHandler(c *gin.Context) {
ch <- resp ch <- resp
} }
oldManifest, _ := manifest.ParseNamedManifest(name) oldManifestDigests, _ := manifest.ReferencedBlobDigestsForName(name)
if len(r.List) > 0 {
if err := createManifestList(r, name, fn); err != nil {
ch <- gin.H{"error": err.Error()}
return
}
if !envconfig.NoPrune() && len(oldManifestDigests) > 0 {
if _, err := manifest.RemoveUnreferencedBlobs(oldManifestDigests...); err != nil {
ch <- gin.H{"error": err.Error()}
return
}
}
ch <- api.ProgressResponse{Status: "success"}
return
}
var baseLayers []*layerGGML var baseLayers []*layerGGML
var err error var err error
@@ -265,8 +282,8 @@ func (s *Server) CreateHandler(c *gin.Context) {
return return
} }
if !envconfig.NoPrune() && oldManifest != nil { if !envconfig.NoPrune() && len(oldManifestDigests) > 0 {
if err := oldManifest.RemoveLayers(); err != nil { if _, err := manifest.RemoveUnreferencedBlobs(oldManifestDigests...); err != nil {
ch <- gin.H{"error": err.Error()} ch <- gin.H{"error": err.Error()}
} }
} }
@@ -599,13 +616,162 @@ func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML,
} }
fn(api.ProgressResponse{Status: "writing manifest"}) fn(api.ProgressResponse{Status: "writing manifest"})
if err := manifest.WriteManifest(name, *configLayer, layers); err != nil { runner, format := manifestMetadataForConfig(*config)
if err := manifest.WriteManifestWithMetadata(name, *configLayer, layers, runner, format); err != nil {
return err return err
} }
return nil return nil
} }
func createManifestList(r api.CreateRequest, name model.Name, fn func(resp api.ProgressResponse)) error {
if err := validateCreateManifestListRequest(r); err != nil {
return err
}
manifests := make([]manifest.Manifest, 0, len(r.List))
for _, ref := range r.List {
ref = strings.TrimSpace(ref)
if ref == "" {
return errors.New("manifest list contains an empty model")
}
fn(api.ProgressResponse{Status: fmt.Sprintf("reading manifest %s", ref)})
modelRef, err := parseAndValidateModelRef(ref)
if err != nil {
return err
}
if modelRef.Source == modelSourceCloud {
return fmt.Errorf("manifest list entries must be local models: %s", ref)
}
childName, err := getExistingName(modelRef.Name)
if err != nil {
return err
}
data, err := manifest.ReadManifestData(childName)
if err != nil {
return fmt.Errorf("read manifest %s: %w", ref, err)
}
var child manifest.Manifest
if err := json.Unmarshal(data, &child); err != nil {
return err
}
if child.MediaType == manifest.MediaTypeManifestList {
return fmt.Errorf("manifest list entry %s is already a manifest list", ref)
}
if err := fillManifestMetadata(&child); err != nil {
return fmt.Errorf("manifest list entry %s: %w", ref, err)
}
childData, err := json.Marshal(child)
if err != nil {
return err
}
childDigest, err := manifest.WriteManifestBlob(childData)
if err != nil {
return err
}
childRef, err := manifest.NewManifestReference(childDigest, child.Runner, child.Format)
if err != nil {
return err
}
manifests = append(manifests, childRef)
}
parent := manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifestList,
Manifests: manifests,
}
data, err := json.Marshal(parent)
if err != nil {
return err
}
fn(api.ProgressResponse{Status: "writing manifest list"})
return manifest.WriteManifestData(name, data)
}
func validateCreateManifestListRequest(r api.CreateRequest) error {
if len(r.List) == 0 {
return errors.New("manifest list must contain at least one model")
}
switch {
case r.From != "", r.RemoteHost != "", len(r.Files) > 0, len(r.Adapters) > 0:
return errors.New("manifest list creation cannot be combined with model creation options")
case r.Template != "", r.System != "", r.License != nil, len(r.Parameters) > 0, len(r.Messages) > 0:
return errors.New("manifest list creation cannot be combined with model creation options")
case r.Renderer != "", r.Parser != "", r.Requires != "", len(r.Info) > 0:
return errors.New("manifest list creation cannot be combined with model creation options")
case r.Quantize != "", r.Quantization != "":
return errors.New("manifest list creation cannot be combined with model creation options")
default:
return nil
}
}
func fillManifestMetadata(m *manifest.Manifest) error {
if m.Runner != "" && m.Format != "" {
return nil
}
config, err := readManifestConfig(m.Config.Digest)
if err != nil {
return err
}
runner, format := manifestMetadataForConfig(config)
if m.Runner == "" {
m.Runner = runner
}
if m.Format == "" {
m.Format = format
}
if m.Runner == "" || m.Format == "" {
return errors.New("manifest is missing runner or format metadata")
}
return nil
}
func readManifestConfig(digest string) (model.ConfigV2, error) {
var config model.ConfigV2
if digest == "" {
return config, errors.New("manifest is missing config digest")
}
configPath, err := manifest.BlobsPath(digest)
if err != nil {
return config, err
}
configFile, err := os.Open(configPath)
if err != nil {
return config, err
}
defer configFile.Close()
return config, json.NewDecoder(configFile).Decode(&config)
}
func manifestMetadataForConfig(config model.ConfigV2) (runner, format string) {
switch strings.ToLower(config.ModelFormat) {
case manifest.FormatSafetensors:
return manifest.RunnerMLX, manifest.FormatSafetensors
case manifest.FormatGGUF, "ggml":
return manifest.RunnerGGML, manifest.FormatGGUF
default:
return "", strings.ToLower(config.ModelFormat)
}
}
func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.ProgressResponse)) (*layerGGML, error) { func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.ProgressResponse)) (*layerGGML, error) {
ft := layer.GGML.KV().FileType() ft := layer.GGML.KV().FileType()
var doneBytes atomic.Uint64 var doneBytes atomic.Uint64

View File

@@ -71,6 +71,8 @@ type Model struct {
System string System string
License []string License []string
Digest string Digest string
ManifestDigest string
Runner string
Options map[string]any Options map[string]any
Messages []api.Message Messages []api.Message
@@ -300,17 +302,30 @@ func (m *Model) String() string {
} }
func GetModel(name string) (*Model, error) { func GetModel(name string) (*Model, error) {
return GetModelForRunner(name, "")
}
// GetModelForRunner returns model metadata for name, selecting runner from a
// manifest list when one is specified.
func GetModelForRunner(name, runner string) (*Model, error) {
n := model.ParseName(name) n := model.ParseName(name)
mf, err := manifest.ParseNamedManifest(n) mf, err := manifest.ParseNamedManifestForRunner(n, runner)
if err != nil { if err != nil {
return nil, err return nil, err
} }
manifestDigest := mf.SelectedDigest()
if manifestDigest == "" {
manifestDigest = mf.Digest()
}
m := &Model{ m := &Model{
Name: n.String(), Name: n.String(),
ShortName: n.DisplayShortest(), ShortName: n.DisplayShortest(),
Digest: mf.Digest(), Digest: mf.Digest(),
Template: template.DefaultTemplate, ManifestDigest: manifestDigest,
Runner: mf.Runner,
Template: template.DefaultTemplate,
} }
if mf.Config.Digest != "" { if mf.Config.Digest != "" {
@@ -411,66 +426,16 @@ func CopyModel(src, dst model.Name) error {
return nil return nil
} }
manifests, err := manifest.Path() data, err := manifest.ReadManifestData(src)
if err != nil { if err != nil {
return err return err
} }
dstpath := filepath.Join(manifests, dst.Filepath()) return manifest.WriteManifestData(dst, data)
if err := os.MkdirAll(filepath.Dir(dstpath), 0o755); err != nil {
return err
}
srcpath := filepath.Join(manifests, src.Filepath())
srcfile, err := os.Open(srcpath)
if err != nil {
return err
}
defer srcfile.Close()
dstfile, err := os.Create(dstpath)
if err != nil {
return err
}
defer dstfile.Close()
_, err = io.Copy(dstfile, srcfile)
return err
}
func deleteUnusedLayers(deleteMap map[string]struct{}) error {
// Ignore corrupt manifests to avoid blocking deletion of layers that are freshly orphaned
manifests, err := manifest.Manifests(true)
if err != nil {
return err
}
for _, manifest := range manifests {
for _, layer := range manifest.Layers {
delete(deleteMap, layer.Digest)
}
delete(deleteMap, manifest.Config.Digest)
}
// only delete the files which are still in the deleteMap
for k := range deleteMap {
fp, err := manifest.BlobsPath(k)
if err != nil {
slog.Info(fmt.Sprintf("couldn't get file path for '%s': %v", k, err))
continue
}
if err := os.Remove(fp); err != nil {
slog.Info(fmt.Sprintf("couldn't remove file '%s': %v", fp, err))
continue
}
}
return nil
} }
func PruneLayers() error { func PruneLayers() error {
deleteMap := make(map[string]struct{}) var candidates []string
p, err := manifest.BlobsPath("") p, err := manifest.BlobsPath("")
if err != nil { if err != nil {
return err return err
@@ -511,17 +476,18 @@ func PruneLayers() error {
continue continue
} }
deleteMap[name] = struct{}{} candidates = append(candidates, name)
} }
slog.Info(fmt.Sprintf("total blobs: %d", len(deleteMap))) slog.Info(fmt.Sprintf("total blobs: %d", len(candidates)))
if err := deleteUnusedLayers(deleteMap); err != nil { removed, err := manifest.RemoveUnreferencedBlobs(candidates...)
if err != nil {
slog.Error(fmt.Sprintf("couldn't remove unused layers: %v", err)) slog.Error(fmt.Sprintf("couldn't remove unused layers: %v", err))
return nil return nil
} }
slog.Info(fmt.Sprintf("total unused blobs removed: %d", len(deleteMap))) slog.Info(fmt.Sprintf("total unused blobs removed: %d", removed))
return nil return nil
} }
@@ -534,29 +500,49 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return errInsecureProtocol return errInsecureProtocol
} }
mf, err := manifest.ParseNamedManifest(n) manifestJSON, err := manifest.ReadManifestData(n)
if err != nil { if err != nil {
fn(api.ProgressResponse{Status: "couldn't retrieve manifest"}) fn(api.ProgressResponse{Status: "couldn't retrieve manifest"})
return err return err
} }
var stored manifest.Manifest
if err := json.Unmarshal(manifestJSON, &stored); err != nil {
fn(api.ProgressResponse{Status: "couldn't retrieve manifest"})
return err
}
var layers []manifest.Layer var layers []manifest.Layer
layers = append(layers, mf.Layers...) manifestMediaType := manifest.MediaTypeManifest
if mf.Config.Digest != "" {
layers = append(layers, mf.Config) if stored.MediaType == manifest.MediaTypeManifestList {
layers, err = pushLayersForManifestList(stored)
if err != nil {
return err
}
manifestMediaType = manifest.MediaTypeManifestList
} else {
mf, err := manifest.ParseNamedManifest(n)
if err != nil {
fn(api.ProgressResponse{Status: "couldn't retrieve manifest"})
return err
}
layers = append(layers, mf.Layers...)
if mf.Config.Digest != "" {
layers = append(layers, mf.Config)
}
if !hasTensorLayers(layers) {
manifestJSON, err = json.Marshal(mf)
if err != nil {
return err
}
}
} }
// Use fast transfer for models with tensor layers (many small blobs) // Use fast transfer for models with tensor layers (many small blobs)
if hasTensorLayers(layers) { if hasTensorLayers(layers) {
// Read raw manifest JSON to preserve tensor metadata fields
manifestPath, err := manifest.PathForName(n)
if err != nil {
return err
}
manifestJSON, err := os.ReadFile(manifestPath)
if err != nil {
return err
}
if err := pushWithTransfer(ctx, n, layers, manifestJSON, regOpts, fn); err != nil { if err := pushWithTransfer(ctx, n, layers, manifestJSON, regOpts, fn); err != nil {
return err return err
} }
@@ -575,13 +561,8 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
requestURL := n.BaseURL() requestURL := n.BaseURL()
requestURL = requestURL.JoinPath("v2", n.DisplayNamespaceModel(), "manifests", n.Tag) requestURL = requestURL.JoinPath("v2", n.DisplayNamespaceModel(), "manifests", n.Tag)
manifestJSON, err := json.Marshal(mf)
if err != nil {
return err
}
headers := make(http.Header) headers := make(http.Header)
headers.Set("Content-Type", "application/vnd.docker.distribution.manifest.v2+json") headers.Set("Content-Type", manifestMediaType)
resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, bytes.NewReader(manifestJSON), regOpts) resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, bytes.NewReader(manifestJSON), regOpts)
if err != nil { if err != nil {
return err return err
@@ -593,22 +574,79 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return nil return nil
} }
func pushLayersForManifestList(parent manifest.Manifest) ([]manifest.Layer, error) {
seen := make(map[string]struct{})
var layers []manifest.Layer
addLayer := func(layer manifest.Layer) error {
if layer.Digest == "" {
return nil
}
if _, ok := seen[layer.Digest]; ok {
return nil
}
if layer.Size == 0 {
p, err := manifest.BlobsPath(layer.Digest)
if err != nil {
return err
}
fi, err := os.Stat(p)
if err != nil {
return err
}
layer.Size = fi.Size()
}
seen[layer.Digest] = struct{}{}
layers = append(layers, layer)
return nil
}
for _, child := range parent.Manifests {
childDigest := child.BlobDigest()
if childDigest == "" {
return nil, errors.New("manifest list child is missing digest")
}
if err := addLayer(manifest.Layer{
MediaType: manifest.MediaTypeManifest,
Digest: childDigest,
}); err != nil {
return nil, err
}
resolved, err := resolveShowManifestChild(child)
if err != nil {
return nil, err
}
for _, layer := range resolved.Layers {
if err := addLayer(layer); err != nil {
return nil, err
}
}
if err := addLayer(resolved.Config); err != nil {
return nil, err
}
}
return layers, nil
}
func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error { func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
n := model.ParseName(name) n := model.ParseName(name)
// build deleteMap to prune unused layers // build deleteMap to prune unused layers
deleteMap := make(map[string]struct{}) deleteMap := make(map[string]struct{})
existingMf, err := manifest.ParseNamedManifest(n) existingDigests, err := manifest.ReferencedBlobDigestsForName(n)
if errors.Is(err, os.ErrNotExist) { if errors.Is(err, os.ErrNotExist) {
// noop // noop
} else if err != nil { } else if err != nil {
slog.Warn("pulling model with bad existing manifest", "name", name, "error", err) slog.Warn("pulling model with bad existing manifest", "name", name, "error", err)
} else { } else {
for _, l := range existingMf.Layers { for _, digest := range existingDigests {
deleteMap[l.Digest] = struct{}{} if blob, err := manifest.BlobsPath(digest); err == nil {
} if _, err := os.Stat(blob); err == nil {
if existingMf.Config.Digest != "" { deleteMap[digest] = struct{}{}
deleteMap[existingMf.Config.Digest] = struct{}{} }
}
} }
} }
@@ -623,6 +661,13 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return fmt.Errorf("pull model manifest: %s", err) return fmt.Errorf("pull model manifest: %s", err)
} }
if mf.MediaType == manifest.MediaTypeManifestList {
mf, err = pullSelectedManifest(ctx, n, mf, regOpts, fn)
if err != nil {
return err
}
}
var layers []manifest.Layer var layers []manifest.Layer
layers = append(layers, mf.Layers...) layers = append(layers, mf.Layers...)
if mf.Config.Digest != "" { if mf.Config.Digest != "" {
@@ -679,25 +724,16 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
fn(api.ProgressResponse{Status: "writing manifest"}) fn(api.ProgressResponse{Status: "writing manifest"})
fp, err := manifest.PathForName(n) if err := manifest.WriteManifestData(n, manifestData); err != nil {
if err != nil { slog.Info(fmt.Sprintf("couldn't write manifest for %s", n.DisplayShortest()))
return err
}
if err := os.MkdirAll(filepath.Dir(fp), 0o755); err != nil {
return err return err
} }
err = os.WriteFile(fp, manifestData, 0o644) slog.Debug("manifest written", "name", n.DisplayShortest(), "sha256", fmt.Sprintf("%x", sha256.Sum256(manifestData)), "size", len(manifestData))
if err != nil {
slog.Info(fmt.Sprintf("couldn't write to %s", fp))
return err
}
slog.Debug("manifest written", "path", fp, "sha256", fmt.Sprintf("%x", sha256.Sum256(manifestData)), "size", len(manifestData))
if !envconfig.NoPrune() && len(deleteMap) > 0 { if !envconfig.NoPrune() && len(deleteMap) > 0 {
fn(api.ProgressResponse{Status: "removing unused layers"}) fn(api.ProgressResponse{Status: "removing unused layers"})
if err := deleteUnusedLayers(deleteMap); err != nil { if _, err := manifest.RemoveUnreferencedBlobs(candidateBlobDigests(deleteMap)...); err != nil {
fn(api.ProgressResponse{Status: fmt.Sprintf("couldn't remove unused layers: %v", err)}) fn(api.ProgressResponse{Status: fmt.Sprintf("couldn't remove unused layers: %v", err)})
} }
} }
@@ -717,8 +753,85 @@ func hasTensorLayers(layers []manifest.Layer) bool {
return false return false
} }
// pullWithTransfer uses the simplified x/transfer package for downloading blobs. func candidateBlobDigests(m map[string]struct{}) []string {
func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer, manifestData []byte, regOpts *registryOptions, fn func(api.ProgressResponse)) error { digests := make([]string, 0, len(m))
for digest := range m {
digests = append(digests, digest)
}
return digests
}
func pullSelectedManifest(ctx context.Context, n model.Name, parent *manifest.Manifest, regOpts *registryOptions, fn func(api.ProgressResponse)) (*manifest.Manifest, error) {
child, err := manifest.SelectManifestReference(parent.Manifests)
if err != nil {
return nil, err
}
childDigest := child.BlobDigest()
if childDigest == "" {
return nil, errors.New("manifest list child is missing digest")
}
layer, err := remoteBlobLayer(ctx, n, childDigest, manifest.MediaTypeManifest, regOpts)
if err != nil {
return nil, err
}
if err := downloadWithTransfer(ctx, n, []manifest.Layer{layer}, regOpts, fn); err != nil {
return nil, err
}
if err := verifyBlob(childDigest); err != nil {
return nil, err
}
blobPath, err := manifest.BlobsPath(childDigest)
if err != nil {
return nil, err
}
data, err := os.ReadFile(blobPath)
if err != nil {
return nil, err
}
var mf manifest.Manifest
if err := json.Unmarshal(data, &mf); err != nil {
return nil, err
}
if mf.MediaType == manifest.MediaTypeManifestList {
return nil, errors.New("nested manifest lists are not supported")
}
if mf.Runner == "" {
mf.Runner = child.Runner
}
if mf.Format == "" {
mf.Format = child.Format
}
return &mf, nil
}
func remoteBlobLayer(ctx context.Context, n model.Name, digest, mediaType string, regOpts *registryOptions) (manifest.Layer, error) {
requestURL := n.BaseURL().JoinPath("v2", n.DisplayNamespaceModel(), "blobs", digest)
resp, err := makeRequestWithRetry(ctx, http.MethodHead, requestURL, nil, nil, regOpts)
if err != nil {
return manifest.Layer{}, err
}
defer resp.Body.Close()
size, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64)
if err != nil {
return manifest.Layer{}, err
}
return manifest.Layer{
MediaType: mediaType,
Digest: digest,
Size: size,
}, nil
}
func downloadWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
blobs := make([]transfer.Blob, len(layers)) blobs := make([]transfer.Blob, len(layers))
for i, layer := range layers { for i, layer := range layers {
blobs[i] = transfer.Blob{ blobs[i] = transfer.Blob{
@@ -773,22 +886,23 @@ func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
return err return err
} }
return nil
}
// pullWithTransfer uses the simplified x/transfer package for downloading blobs.
func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer, manifestData []byte, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
if err := downloadWithTransfer(ctx, n, layers, regOpts, fn); err != nil {
return err
}
// Write manifest // Write manifest
fn(api.ProgressResponse{Status: "writing manifest"}) fn(api.ProgressResponse{Status: "writing manifest"})
fp, err := manifest.PathForName(n) if err := manifest.WriteManifestData(n, manifestData); err != nil {
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(fp), 0o755); err != nil {
return err return err
} }
if err := os.WriteFile(fp, manifestData, 0o644); err != nil { slog.Debug("manifest written", "name", n.DisplayShortest(), "sha256", fmt.Sprintf("%x", sha256.Sum256(manifestData)), "size", len(manifestData))
return err
}
slog.Debug("manifest written", "path", fp, "sha256", fmt.Sprintf("%x", sha256.Sum256(manifestData)), "size", len(manifestData))
return nil return nil
} }
@@ -854,7 +968,7 @@ func pullModelManifest(ctx context.Context, n model.Name, regOpts *registryOptio
requestURL := n.BaseURL().JoinPath("v2", n.DisplayNamespaceModel(), "manifests", n.Tag) requestURL := n.BaseURL().JoinPath("v2", n.DisplayNamespaceModel(), "manifests", n.Tag)
headers := make(http.Header) headers := make(http.Header)
headers.Set("Accept", "application/vnd.docker.distribution.manifest.v2+json") headers.Set("Accept", strings.Join([]string{manifest.MediaTypeManifestList, manifest.MediaTypeManifest}, ", "))
resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, headers, nil, regOpts) resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, headers, nil, regOpts)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err

View File

@@ -1,15 +1,19 @@
package server package server
import ( import (
"bytes"
"crypto/sha256" "crypto/sha256"
"encoding/json"
"fmt" "fmt"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"os" "os"
"strconv"
"strings" "strings"
"testing" "testing"
"time" "time"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/fs/ggml" "github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/manifest" "github.com/ollama/ollama/manifest"
"github.com/ollama/ollama/template" "github.com/ollama/ollama/template"
@@ -57,6 +61,203 @@ func TestPruneLayersSkipsRecentOrphans(t *testing.T) {
} }
} }
func TestPushLayersForManifestListIncludesChildManifests(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
writeChild := func(name, runner, formatName, layerMediaType string) (manifest.Manifest, manifest.Layer, manifest.Layer) {
t.Helper()
config, err := manifest.NewLayer(strings.NewReader(name+" config"), "application/vnd.docker.container.image.v1+json")
if err != nil {
t.Fatal(err)
}
layer, err := manifest.NewLayer(strings.NewReader(name+" layer"), layerMediaType)
if err != nil {
t.Fatal(err)
}
if err := manifest.WriteManifestWithMetadata(model.ParseName(name), config, []manifest.Layer{layer}, runner, formatName); err != nil {
t.Fatal(err)
}
mf, err := manifest.ParseNamedManifestForRunner(model.ParseName(name), runner)
if err != nil {
t.Fatal(err)
}
return *mf, config, layer
}
mlx, mlxConfig, mlxLayer := writeChild("library/push-mlx:latest", manifest.RunnerMLX, manifest.FormatSafetensors, manifest.MediaTypeImageTensor)
ggml, ggmlConfig, ggmlLayer := writeChild("library/push-ggml:latest", manifest.RunnerGGML, manifest.FormatGGUF, "application/vnd.ollama.image.model")
mlxRef, err := manifest.NewManifestReference(mlx.BlobDigest(), mlx.Runner, mlx.Format)
if err != nil {
t.Fatal(err)
}
ggmlRef, err := manifest.NewManifestReference(ggml.BlobDigest(), ggml.Runner, ggml.Format)
if err != nil {
t.Fatal(err)
}
layers, err := pushLayersForManifestList(manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifestList,
Manifests: []manifest.Manifest{mlxRef, ggmlRef},
})
if err != nil {
t.Fatal(err)
}
want := map[string]string{
mlx.BlobDigest(): manifest.MediaTypeManifest,
ggml.BlobDigest(): manifest.MediaTypeManifest,
mlxConfig.Digest: mlxConfig.MediaType,
mlxLayer.Digest: mlxLayer.MediaType,
ggmlConfig.Digest: ggmlConfig.MediaType,
ggmlLayer.Digest: ggmlLayer.MediaType,
}
if len(layers) != len(want) {
t.Fatalf("layer count = %d, want %d: %#v", len(layers), len(want), layers)
}
for _, layer := range layers {
if wantMediaType, ok := want[layer.Digest]; !ok {
t.Fatalf("unexpected layer digest %q", layer.Digest)
} else if layer.MediaType != wantMediaType {
t.Fatalf("layer %q media type = %q, want %q", layer.Digest, layer.MediaType, wantMediaType)
}
if layer.Size == 0 {
t.Fatalf("layer %q has zero size", layer.Digest)
}
}
if !hasTensorLayers(layers) {
t.Fatal("manifest list push layers did not preserve tensor media type")
}
}
func TestPullModelManifestListDownloadsSelectedChildOnly(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
configData := []byte(`{"architecture":"test"}`)
configDigest := fmt.Sprintf("sha256:%x", sha256.Sum256(configData))
layerData := []byte("selected tensor layer")
layerDigest := fmt.Sprintf("sha256:%x", sha256.Sum256(layerData))
child := manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifest,
Runner: manifest.RunnerGGML,
Format: manifest.FormatGGUF,
Config: manifest.Layer{
MediaType: "application/vnd.docker.container.image.v1+json",
Digest: configDigest,
Size: int64(len(configData)),
},
Layers: []manifest.Layer{
{
MediaType: manifest.MediaTypeImageTensor,
Digest: layerDigest,
Size: int64(len(layerData)),
},
},
}
childData, err := json.Marshal(child)
if err != nil {
t.Fatal(err)
}
childDigest := fmt.Sprintf("sha256:%x", sha256.Sum256(childData))
childRef, err := manifest.NewManifestReference(childDigest, manifest.RunnerGGML, manifest.FormatGGUF)
if err != nil {
t.Fatal(err)
}
unselectedDigest := "sha256:" + strings.Repeat("f", 64)
unselectedRef, err := manifest.NewManifestReference(unselectedDigest, manifest.RunnerLlamaCPP, manifest.FormatGGUF)
if err != nil {
t.Fatal(err)
}
parent := manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifestList,
Manifests: []manifest.Manifest{childRef, unselectedRef},
}
parentData, err := json.Marshal(parent)
if err != nil {
t.Fatal(err)
}
blobs := map[string][]byte{
childDigest: childData,
configDigest: configData,
layerDigest: layerData,
}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodGet && r.URL.Path == "/v2/library/test/manifests/latest":
w.Header().Set("Content-Type", manifest.MediaTypeManifestList)
w.Header().Set("Content-Length", strconv.Itoa(len(parentData)))
_, _ = w.Write(parentData)
case (r.Method == http.MethodHead || r.Method == http.MethodGet) && strings.HasPrefix(r.URL.Path, "/v2/library/test/blobs/"):
digest := strings.TrimPrefix(r.URL.Path, "/v2/library/test/blobs/")
if digest == unselectedDigest {
t.Errorf("requested unselected child manifest %s", digest)
http.Error(w, "unselected child requested", http.StatusNotFound)
return
}
data, ok := blobs[digest]
if !ok {
http.NotFound(w, r)
return
}
w.Header().Set("Content-Length", strconv.Itoa(len(data)))
if r.Method == http.MethodGet {
_, _ = w.Write(data)
}
default:
http.NotFound(w, r)
}
}))
defer ts.Close()
name := strings.TrimPrefix(ts.URL, "http://") + "/library/test:latest"
if err := PullModel(t.Context(), name, &registryOptions{Insecure: true}, func(api.ProgressResponse) {}); err != nil {
t.Fatal(err)
}
n := model.ParseName(name)
gotParentData, err := manifest.ReadManifestData(n)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(gotParentData, parentData) {
t.Fatal("named manifest does not contain the parent manifest list")
}
m, err := manifest.ParseNamedManifest(n)
if err != nil {
t.Fatal(err)
}
if m.Runner != manifest.RunnerGGML {
t.Fatalf("runner = %q, want %q", m.Runner, manifest.RunnerGGML)
}
if m.Config.Digest != configDigest {
t.Fatalf("config digest = %q, want %q", m.Config.Digest, configDigest)
}
for _, digest := range []string{childDigest, configDigest, layerDigest} {
path, err := manifest.BlobsPath(digest)
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(path); err != nil {
t.Fatalf("expected blob %s to exist: %v", digest, err)
}
}
path, err := manifest.BlobsPath(unselectedDigest)
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(path); !os.IsNotExist(err) {
t.Fatalf("unselected child manifest blob exists: %v", err)
}
}
func TestModelCapabilities(t *testing.T) { func TestModelCapabilities(t *testing.T) {
// Create completion model (llama architecture without vision) // Create completion model (llama architecture without vision)
completionModelPath, _ := createBinFile(t, ggml.KV{ completionModelPath, _ := createBinFile(t, ggml.KV{

View File

@@ -116,6 +116,10 @@ func (s *Local) serveHTTP(rec *statusCodeRecorder, r *http.Request) {
proxied, err := func() (bool, error) { proxied, err := func() (bool, error) {
switch r.URL.Path { switch r.URL.Path {
case "/api/delete": case "/api/delete":
if s.Fallback != nil {
s.Fallback.ServeHTTP(rec, r)
return true, nil
}
return false, s.handleDelete(rec, r) return false, s.handleDelete(rec, r)
case "/api/pull": case "/api/pull":
return false, s.handlePull(rec, r) return false, s.handlePull(rec, r)

View File

@@ -141,14 +141,29 @@ func (s *Server) modelOptions(model *Model, requestOpts map[string]any) (api.Opt
return opts, nil return opts, nil
} }
func normalizeRunner(runner string) (string, error) {
switch strings.ToLower(strings.TrimSpace(runner)) {
case "":
return "", nil
case manifest.RunnerMLX, "mlxrunner":
return manifest.RunnerMLX, nil
case manifest.RunnerGGML:
return manifest.RunnerGGML, nil
case manifest.RunnerLlamaCPP, "llama.cpp", "llama-cpp", "llama_cpp":
return manifest.RunnerLlamaCPP, nil
default:
return "", fmt.Errorf("unknown runner %q", runner)
}
}
// scheduleRunner schedules a runner after validating inputs such as capabilities and model options. // scheduleRunner schedules a runner after validating inputs such as capabilities and model options.
// It returns the allocated runner, model instance, and consolidated options if successful and error otherwise. // It returns the allocated runner, model instance, and consolidated options if successful and error otherwise.
func (s *Server) scheduleRunner(ctx context.Context, name string, caps []model.Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) { func (s *Server) scheduleRunner(ctx context.Context, name, selectedRunner string, caps []model.Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) {
if name == "" { if name == "" {
return nil, nil, nil, fmt.Errorf("model %w", errRequired) return nil, nil, nil, fmt.Errorf("model %w", errRequired)
} }
model, err := GetModel(name) model, err := GetModelForRunner(name, selectedRunner)
if err != nil { if err != nil {
return nil, nil, nil, err return nil, nil, nil, err
} }
@@ -207,6 +222,13 @@ func (s *Server) GenerateHandler(c *gin.Context) {
return return
} }
if runner, err := normalizeRunner(req.Runner); err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
} else {
req.Runner = runner
}
modelRef, err := parseAndValidateModelRef(req.Model) modelRef, err := parseAndValidateModelRef(req.Model)
if err != nil { if err != nil {
writeModelRefParseError(c, err, http.StatusNotFound, fmt.Sprintf("model '%s' not found", req.Model)) writeModelRefParseError(c, err, http.StatusNotFound, fmt.Sprintf("model '%s' not found", req.Model))
@@ -231,11 +253,13 @@ func (s *Server) GenerateHandler(c *gin.Context) {
return return
} }
m, err := GetModel(name.String()) m, err := GetModelForRunner(name.String(), req.Runner)
if err != nil { if err != nil {
switch { switch {
case errors.Is(err, fs.ErrNotExist): case errors.Is(err, fs.ErrNotExist):
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)}) c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
case errors.Is(err, manifest.ErrNoCompatibleManifest):
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
case err.Error() == errtypes.InvalidModelNameErrMsg: case err.Error() == errtypes.InvalidModelNameErrMsg:
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
default: default:
@@ -405,7 +429,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
} }
} }
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive) r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), req.Runner, caps, req.Options, req.KeepAlive)
if errors.Is(err, errCapabilityCompletion) { if errors.Is(err, errCapabilityCompletion) {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)}) c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
return return
@@ -727,7 +751,7 @@ func (s *Server) EmbedHandler(c *gin.Context) {
return return
} }
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), []model.Capability{}, req.Options, req.KeepAlive) r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), "", []model.Capability{}, req.Options, req.KeepAlive)
if err != nil { if err != nil {
handleScheduleError(c, req.Model, err) handleScheduleError(c, req.Model, err)
return return
@@ -882,7 +906,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
name := modelRef.Name name := modelRef.Name
r, _, _, err := s.scheduleRunner(c.Request.Context(), name.String(), []model.Capability{}, req.Options, req.KeepAlive) r, _, _, err := s.scheduleRunner(c.Request.Context(), name.String(), "", []model.Capability{}, req.Options, req.KeepAlive)
if err != nil { if err != nil {
handleScheduleError(c, req.Model, err) handleScheduleError(c, req.Model, err)
return return
@@ -1081,8 +1105,7 @@ func (s *Server) DeleteHandler(c *gin.Context) {
return return
} }
m, err := manifest.ParseNamedManifest(n) if err := manifest.RemoveNamed(n); err != nil {
if err != nil {
switch { switch {
case os.IsNotExist(err): case os.IsNotExist(err):
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", cmp.Or(r.Model, r.Name))}) c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", cmp.Or(r.Model, r.Name))})
@@ -1091,16 +1114,85 @@ func (s *Server) DeleteHandler(c *gin.Context) {
} }
return return
} }
}
if err := m.Remove(); err != nil { func writeShowError(c *gin.Context, model string, err error) {
var statusErr api.StatusError
switch {
case os.IsNotExist(err):
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", model)})
case errors.Is(err, manifest.ErrNoCompatibleManifest):
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
case errors.As(err, &statusErr):
c.JSON(statusErr.StatusCode, gin.H{"error": statusErr.ErrorMessage})
case err.Error() == errtypes.InvalidModelNameErrMsg:
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
default:
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return }
}
func readBlobData(digest string) ([]byte, error) {
blobPath, err := manifest.BlobsPath(digest)
if err != nil {
return nil, err
} }
if err := m.RemoveLayers(); err != nil { return os.ReadFile(blobPath)
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) }
return
func resolveShowManifestChild(child manifest.Manifest) (*manifest.Manifest, error) {
if child.MediaType == manifest.MediaTypeManifestList {
return nil, errors.New("nested manifest lists are not supported")
} }
resolved := child
if resolved.Config.Digest == "" && len(resolved.Layers) == 0 && resolved.Digest() != "" {
data, err := readBlobData(resolved.BlobDigest())
if err != nil {
return nil, err
}
if err := json.Unmarshal(data, &resolved); err != nil {
return nil, err
}
if resolved.Runner == "" {
resolved.Runner = child.Runner
}
if resolved.Format == "" {
resolved.Format = child.Format
}
}
return &resolved, nil
}
func collectManifestLicenseText(children []manifest.Manifest) (string, error) {
seen := make(map[string]struct{})
var licenses []string
for _, child := range children {
for _, layer := range child.Layers {
if layer.MediaType != "application/vnd.ollama.image.license" || layer.Digest == "" {
continue
}
digest := layer.Digest
if _, ok := seen[digest]; ok {
continue
}
data, err := readBlobData(digest)
if err != nil {
return "", err
}
seen[digest] = struct{}{}
licenses = append(licenses, string(data))
}
}
return strings.Join(licenses, "\n"), nil
} }
func (s *Server) ShowHandler(c *gin.Context) { func (s *Server) ShowHandler(c *gin.Context) {
@@ -1124,6 +1216,15 @@ func (s *Server) ShowHandler(c *gin.Context) {
return return
} }
if req.Runner, err = normalizeRunner(req.Runner); err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if req.AllManifests && req.Runner != "" {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "runner cannot be used with all_manifests"})
return
}
modelRef, err := parseAndValidateModelRef(req.Model) modelRef, err := parseAndValidateModelRef(req.Model)
if err != nil { if err != nil {
writeModelRefParseError(c, err, http.StatusBadRequest, err.Error()) writeModelRefParseError(c, err, http.StatusBadRequest, err.Error())
@@ -1138,19 +1239,20 @@ func (s *Server) ShowHandler(c *gin.Context) {
req.Model = modelRef.Base req.Model = modelRef.Base
if req.AllManifests {
resp, err := GetAllManifestsInfo(req)
if err != nil {
writeShowError(c, req.Model, err)
return
}
c.JSON(http.StatusOK, resp)
return
}
resp, err := GetModelInfo(req) resp, err := GetModelInfo(req)
if err != nil { if err != nil {
var statusErr api.StatusError writeShowError(c, req.Model, err)
switch {
case os.IsNotExist(err):
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
case errors.As(err, &statusErr):
c.JSON(statusErr.StatusCode, gin.H{"error": statusErr.ErrorMessage})
case err.Error() == errtypes.InvalidModelNameErrMsg:
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
default:
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
}
return return
} }
@@ -1173,17 +1275,127 @@ func (s *Server) ShowHandler(c *gin.Context) {
c.JSON(http.StatusOK, resp) c.JSON(http.StatusOK, resp)
} }
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) { func GetAllManifestsInfo(req api.ShowRequest) (*api.ShowManifestsResponse, error) {
runner, err := normalizeRunner(req.Runner)
if err != nil {
return nil, api.StatusError{
StatusCode: http.StatusBadRequest,
ErrorMessage: err.Error(),
}
}
req.Runner = runner
if req.Runner != "" {
return nil, api.StatusError{
StatusCode: http.StatusBadRequest,
ErrorMessage: "runner cannot be used with all_manifests",
}
}
name := model.ParseName(req.Model) name := model.ParseName(req.Model)
if !name.IsValid() { if !name.IsValid() {
return nil, model.Unqualified(name) return nil, model.Unqualified(name)
} }
name, err := getExistingName(name) name, err = getExistingName(name)
if err != nil {
return nil, err
}
req.Model = name.String()
data, err := manifest.ReadManifestData(name)
if err != nil { if err != nil {
return nil, err return nil, err
} }
m, err := GetModel(name.String()) var parent manifest.Manifest
if err := json.Unmarshal(data, &parent); err != nil {
return nil, err
}
if parent.MediaType != manifest.MediaTypeManifestList {
resp, err := GetModelInfo(req)
if err != nil {
return nil, err
}
mf, err := manifest.ParseNamedManifestForRunner(name, "")
if err != nil {
return nil, err
}
return &api.ShowManifestsResponse{
Manifests: []api.ShowManifest{{
Runner: mf.Runner,
ShowResponse: *resp,
}},
License: resp.License,
}, nil
}
resolvedChildren := make([]manifest.Manifest, 0, len(parent.Manifests))
resp := &api.ShowManifestsResponse{
Manifests: make([]api.ShowManifest, 0, len(parent.Manifests)),
}
for _, child := range parent.Manifests {
resolved, err := resolveShowManifestChild(child)
if err != nil {
return nil, err
}
if resolved.Runner == "" {
return nil, fmt.Errorf("manifest list child %q is missing runner metadata", resolved.BlobDigest())
}
runner, err := normalizeRunner(resolved.Runner)
if err != nil {
return nil, err
}
resolved.Runner = runner
resolvedChildren = append(resolvedChildren, *resolved)
childResp, err := GetModelInfo(api.ShowRequest{
Model: req.Model,
Runner: resolved.Runner,
System: req.System,
Verbose: req.Verbose,
Options: req.Options,
})
if err != nil {
return nil, err
}
resp.Manifests = append(resp.Manifests, api.ShowManifest{
Runner: resolved.Runner,
ShowResponse: *childResp,
})
}
resp.License, err = collectManifestLicenseText(resolvedChildren)
if err != nil {
return nil, err
}
return resp, nil
}
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
runner, err := normalizeRunner(req.Runner)
if err != nil {
return nil, api.StatusError{
StatusCode: http.StatusBadRequest,
ErrorMessage: err.Error(),
}
}
req.Runner = runner
name := model.ParseName(req.Model)
if !name.IsValid() {
return nil, model.Unqualified(name)
}
name, err = getExistingName(name)
if err != nil {
return nil, err
}
m, err := GetModelForRunner(name.String(), req.Runner)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -1217,7 +1429,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
// For safetensors LLM models (experimental), populate details from config.json // For safetensors LLM models (experimental), populate details from config.json
if m.Config.ModelFormat == "safetensors" && slices.Contains(m.Config.Capabilities, "completion") { if m.Config.ModelFormat == "safetensors" && slices.Contains(m.Config.Capabilities, "completion") {
if info, err := xserver.GetSafetensorsLLMInfo(name); err == nil { if info, err := xserver.GetSafetensorsLLMInfoForRunner(name, req.Runner); err == nil {
if arch, ok := info["general.architecture"].(string); ok && arch != "" { if arch, ok := info["general.architecture"].(string); ok && arch != "" {
modelDetails.Family = arch modelDetails.Family = arch
} }
@@ -1227,7 +1439,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
} }
// Older manifests may not have file_type populated for safetensors models. // Older manifests may not have file_type populated for safetensors models.
if modelDetails.QuantizationLevel == "" { if modelDetails.QuantizationLevel == "" {
if dtype, err := xserver.GetSafetensorsDtype(name); err == nil && dtype != "" { if dtype, err := xserver.GetSafetensorsDtypeForRunner(name, req.Runner); err == nil && dtype != "" {
modelDetails.QuantizationLevel = dtype modelDetails.QuantizationLevel = dtype
} }
} }
@@ -1242,7 +1454,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
msgs[i] = api.Message{Role: msg.Role, Content: msg.Content} msgs[i] = api.Message{Role: msg.Role, Content: msg.Content}
} }
mf, err := manifest.ParseNamedManifest(name) mf, err := manifest.ParseNamedManifestForRunner(name, req.Runner)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -1327,25 +1539,19 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
} }
if slices.Contains(m.Capabilities(), model.CapabilityImage) { if slices.Contains(m.Capabilities(), model.CapabilityImage) {
// Populate tensor info if verbose if tensors, err := xserver.GetSafetensorsTensorInfoForRunner(name, req.Runner); err == nil {
if req.Verbose { resp.Tensors = tensors
if tensors, err := xserver.GetSafetensorsTensorInfo(name); err == nil {
resp.Tensors = tensors
}
} }
return resp, nil return resp, nil
} }
// For safetensors LLM models (experimental), populate ModelInfo from config.json // For safetensors LLM models (experimental), populate ModelInfo from config.json
if m.Config.ModelFormat == "safetensors" && slices.Contains(m.Config.Capabilities, "completion") { if m.Config.ModelFormat == "safetensors" && slices.Contains(m.Config.Capabilities, "completion") {
if info, err := xserver.GetSafetensorsLLMInfo(name); err == nil { if info, err := xserver.GetSafetensorsLLMInfoForRunner(name, req.Runner); err == nil {
resp.ModelInfo = info resp.ModelInfo = info
} }
// Populate tensor info if verbose if tensors, err := xserver.GetSafetensorsTensorInfoForRunner(name, req.Runner); err == nil {
if req.Verbose { resp.Tensors = tensors
if tensors, err := xserver.GetSafetensorsTensorInfo(name); err == nil {
resp.Tensors = tensors
}
} }
return resp, nil return resp, nil
} }
@@ -1409,6 +1615,11 @@ func (s *Server) ListHandler(c *gin.Context) {
models := []api.ListModelResponse{} models := []api.ListModelResponse{}
for n, m := range ms { for n, m := range ms {
var cf model.ConfigV2 var cf model.ConfigV2
size, err := manifest.TotalSizeForName(n)
if err != nil {
slog.Warn("bad manifest size", "name", n, "error", err)
size = m.Size()
}
if m.Config.Digest != "" { if m.Config.Digest != "" {
f, err := m.Config.Open() f, err := m.Config.Open()
@@ -1430,7 +1641,7 @@ func (s *Server) ListHandler(c *gin.Context) {
Name: n.DisplayShortest(), Name: n.DisplayShortest(),
RemoteModel: cf.RemoteModel, RemoteModel: cf.RemoteModel,
RemoteHost: cf.RemoteHost, RemoteHost: cf.RemoteHost,
Size: m.Size(), Size: size,
Digest: m.Digest(), Digest: m.Digest(),
ModifiedAt: m.FileInfo().ModTime(), ModifiedAt: m.FileInfo().ModTime(),
Details: api.ModelDetails{ Details: api.ModelDetails{
@@ -1770,13 +1981,15 @@ func Serve(ln net.Listener) error {
return err return err
} }
manifestsPath, err := manifest.Path() for _, rootFn := range []func() (string, error){manifest.Path, manifest.V2Path} {
if err != nil { manifestsPath, err := rootFn()
return err if err != nil {
} return err
}
if err := manifest.PruneDirectory(manifestsPath); err != nil { if err := manifest.PruneDirectory(manifestsPath); err != nil && !os.IsNotExist(err) {
return err return err
}
} }
} }
} }
@@ -2047,6 +2260,17 @@ func (s *Server) PsHandler(c *gin.Context) {
for _, v := range s.sched.loaded { for _, v := range s.sched.loaded {
model := v.model model := v.model
digest := model.ManifestDigest
if digest == "" {
digest = model.Digest
}
runner := v.runner
if runner == "" {
runner = model.Runner
}
if normalized, err := normalizeRunner(runner); err == nil && normalized != "" {
runner = normalized
}
modelDetails := api.ModelDetails{ modelDetails := api.ModelDetails{
Format: model.Config.ModelFormat, Format: model.Config.ModelFormat,
Family: model.Config.ModelFamily, Family: model.Config.ModelFamily,
@@ -2060,9 +2284,10 @@ func (s *Server) PsHandler(c *gin.Context) {
Name: model.ShortName, Name: model.ShortName,
Size: int64(v.totalSize), Size: int64(v.totalSize),
SizeVRAM: int64(v.vramSize), SizeVRAM: int64(v.vramSize),
Digest: model.Digest, Digest: digest,
Details: modelDetails, Details: modelDetails,
ExpiresAt: v.expiresAt, ExpiresAt: v.expiresAt,
Runner: runner,
} }
if v.llama != nil { if v.llama != nil {
mr.ContextLength = v.llama.ContextLength() mr.ContextLength = v.llama.ContextLength()
@@ -2115,6 +2340,13 @@ func (s *Server) ChatHandler(c *gin.Context) {
return return
} }
if runner, err := normalizeRunner(req.Runner); err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
} else {
req.Runner = runner
}
modelRef, err := parseAndValidateModelRef(req.Model) modelRef, err := parseAndValidateModelRef(req.Model)
if err != nil { if err != nil {
writeModelRefParseError(c, err, http.StatusBadRequest, "model is required") writeModelRefParseError(c, err, http.StatusBadRequest, "model is required")
@@ -2139,11 +2371,13 @@ func (s *Server) ChatHandler(c *gin.Context) {
return return
} }
m, err := GetModel(name.String()) m, err := GetModelForRunner(name.String(), req.Runner)
if err != nil { if err != nil {
switch { switch {
case os.IsNotExist(err): case os.IsNotExist(err):
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)}) c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
case errors.Is(err, manifest.ErrNoCompatibleManifest):
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
case err.Error() == errtypes.InvalidModelNameErrMsg: case err.Error() == errtypes.InvalidModelNameErrMsg:
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
default: default:
@@ -2292,7 +2526,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
} }
} }
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive) r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), req.Runner, caps, req.Options, req.KeepAlive)
if errors.Is(err, errCapabilityCompletion) { if errors.Is(err, errCapabilityCompletion) {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)}) c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
return return
@@ -2631,6 +2865,8 @@ func handleScheduleError(c *gin.Context, name string, err error) {
switch { switch {
case errors.Is(err, errCapabilities), errors.Is(err, errRequired): case errors.Is(err, errCapabilities), errors.Is(err, errRequired):
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
case errors.Is(err, manifest.ErrNoCompatibleManifest):
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
case errors.Is(err, context.Canceled): case errors.Is(err, context.Canceled):
c.JSON(499, gin.H{"error": "request canceled"}) c.JSON(499, gin.H{"error": "request canceled"})
case errors.Is(err, ErrMaxQueue): case errors.Is(err, ErrMaxQueue):
@@ -2681,7 +2917,7 @@ func (s *Server) handleImageGenerate(c *gin.Context, req api.GenerateRequest, mo
} }
// Schedule the runner for image generation // Schedule the runner for image generation
runner, _, _, err := s.scheduleRunner(c.Request.Context(), modelName, []model.Capability{model.CapabilityImage}, nil, req.KeepAlive) runner, _, _, err := s.scheduleRunner(c.Request.Context(), modelName, req.Runner, []model.Capability{model.CapabilityImage}, nil, req.KeepAlive)
if err != nil { if err != nil {
handleScheduleError(c, req.Model, err) handleScheduleError(c, req.Model, err)
return return

View File

@@ -109,12 +109,44 @@ func checkFileExists(t *testing.T, p string, expect []string) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if strings.HasSuffix(filepath.ToSlash(p), "/blobs/*") {
actual = slices.DeleteFunc(actual, isManifestBlobForTest)
}
if diff := gocmp.Diff(expect, actual, gocmpopts.SortSlices(strings.Compare), gocmpopts.EquateEmpty()); diff != "" { if diff := gocmp.Diff(expect, actual, gocmpopts.SortSlices(strings.Compare), gocmpopts.EquateEmpty()); diff != "" {
t.Errorf("file exists mismatch (-want +got):\n%s", diff) t.Errorf("file exists mismatch (-want +got):\n%s", diff)
} }
} }
func checkManifestFiles(t *testing.T, names ...string) {
t.Helper()
expect := make([]string, len(names))
for i, name := range names {
p, err := manifest.V2PathForName(model.ParseName(name))
if err != nil {
t.Fatal(err)
}
expect[i] = p
}
checkFileExists(t, filepath.Join(envconfig.Models(), "manifests-v2", "*", "*", "*", "*"), expect)
}
func isManifestBlobForTest(path string) bool {
data, err := os.ReadFile(path)
if err != nil {
return false
}
var m manifest.Manifest
if err := json.Unmarshal(data, &m); err != nil {
return false
}
return m.SchemaVersion != 0 && m.MediaType != "" && (m.Config.Digest != "" || len(m.Layers) > 0)
}
func TestCreateFromBin(t *testing.T) { func TestCreateFromBin(t *testing.T) {
gin.SetMode(gin.TestMode) gin.SetMode(gin.TestMode)
@@ -136,9 +168,7 @@ func TestCreateFromBin(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-6bcdb8859d417753645538d7bbfbd7ca91a3f0c191aef5379c53c05e86b669dd"), filepath.Join(p, "blobs", "sha256-6bcdb8859d417753645538d7bbfbd7ca91a3f0c191aef5379c53c05e86b669dd"),
@@ -196,9 +226,7 @@ func TestCreateFromModel(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
w = createRequest(t, s.CreateHandler, api.CreateRequest{ w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test2", Name: "test2",
@@ -210,10 +238,7 @@ func TestCreateFromModel(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test", "test2")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
})
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-6bcdb8859d417753645538d7bbfbd7ca91a3f0c191aef5379c53c05e86b669dd"), filepath.Join(p, "blobs", "sha256-6bcdb8859d417753645538d7bbfbd7ca91a3f0c191aef5379c53c05e86b669dd"),
@@ -306,9 +331,7 @@ func TestCreateRemovesLayers(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-89a2116c3a82d6a97f59f748d86ed4417214353fd178ee54df418fde32495fad"), filepath.Join(p, "blobs", "sha256-89a2116c3a82d6a97f59f748d86ed4417214353fd178ee54df418fde32495fad"),
@@ -327,9 +350,7 @@ func TestCreateRemovesLayers(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-136bf7c76bac2ec09d6617885507d37829e04b41acc47687d45e512b544e893a"), filepath.Join(p, "blobs", "sha256-136bf7c76bac2ec09d6617885507d37829e04b41acc47687d45e512b544e893a"),
@@ -338,6 +359,113 @@ func TestCreateRemovesLayers(t *testing.T) {
}) })
} }
func writeManifestListVariant(t *testing.T, name, modelFormat string) {
t.Helper()
configData, err := json.Marshal(model.ConfigV2{
ModelFormat: modelFormat,
Capabilities: []string{"completion"},
})
if err != nil {
t.Fatal(err)
}
configLayer, err := manifest.NewLayer(bytes.NewReader(configData), "application/vnd.docker.container.image.v1+json")
if err != nil {
t.Fatal(err)
}
modelLayer, err := manifest.NewLayer(strings.NewReader(name+" layer"), "application/vnd.ollama.image.license")
if err != nil {
t.Fatal(err)
}
if err := manifest.WriteManifest(model.ParseName(name), configLayer, []manifest.Layer{modelLayer}); err != nil {
t.Fatal(err)
}
}
func TestCreateManifestList(t *testing.T) {
gin.SetMode(gin.TestMode)
t.Setenv("OLLAMA_MODELS", t.TempDir())
var s Server
writeManifestListVariant(t, "test-gguf", manifest.FormatGGUF)
writeManifestListVariant(t, "test-safetensors", manifest.FormatSafetensors)
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "test-list",
List: []string{"test-gguf", "test-safetensors"},
Stream: &stream,
})
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d: %s", w.Code, w.Body.String())
}
data, err := manifest.ReadManifestData(model.ParseName("test-list"))
if err != nil {
t.Fatal(err)
}
var parent manifest.Manifest
if err := json.Unmarshal(data, &parent); err != nil {
t.Fatal(err)
}
if parent.MediaType != manifest.MediaTypeManifestList {
t.Fatalf("mediaType = %q, want %q", parent.MediaType, manifest.MediaTypeManifestList)
}
if len(parent.Manifests) != 2 {
t.Fatalf("manifest count = %d, want 2", len(parent.Manifests))
}
selected, err := manifest.ParseNamedManifest(model.ParseName("test-list"))
if err != nil {
t.Fatal(err)
}
if selected.Config.Digest == "" {
t.Fatal("selected manifest is missing config")
}
mlxInfo, err := GetModelInfo(api.ShowRequest{Model: "test-list", Runner: manifest.RunnerMLX})
if err != nil {
t.Fatal(err)
}
if mlxInfo.Details.Format != manifest.FormatSafetensors {
t.Fatalf("mlx show format = %q, want %q", mlxInfo.Details.Format, manifest.FormatSafetensors)
}
want := map[string]string{
manifest.RunnerGGML: manifest.FormatGGUF,
manifest.RunnerMLX: manifest.FormatSafetensors,
}
for _, child := range parent.Manifests {
if got := want[child.Runner]; got != child.Format {
t.Fatalf("child runner/format = %q/%q, want one of %v", child.Runner, child.Format, want)
}
if child.BlobDigest() == "" {
t.Fatal("child manifest reference is missing digest")
}
if child.Config.Digest != "" || len(child.Layers) != 0 {
t.Fatalf("child manifest reference embedded config/layers: config=%q layers=%d", child.Config.Digest, len(child.Layers))
}
childBlob, err := manifest.BlobsPath(child.BlobDigest())
if err != nil {
t.Fatal(err)
}
childData, err := os.ReadFile(childBlob)
if err != nil {
t.Fatalf("child manifest blob missing: %v", err)
}
var resolved manifest.Manifest
if err := json.Unmarshal(childData, &resolved); err != nil {
t.Fatal(err)
}
if resolved.Config.Digest == "" || len(resolved.Layers) == 0 {
t.Fatalf("resolved child manifest missing config/layers: config=%q layers=%d", resolved.Config.Digest, len(resolved.Layers))
}
}
}
func TestCreateUnsetsSystem(t *testing.T) { func TestCreateUnsetsSystem(t *testing.T) {
gin.SetMode(gin.TestMode) gin.SetMode(gin.TestMode)
@@ -357,9 +485,7 @@ func TestCreateUnsetsSystem(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-0a666d113e8e0a3d27e9c7bd136a0bdfb6241037db50729d81568451ebfdbde8"), filepath.Join(p, "blobs", "sha256-0a666d113e8e0a3d27e9c7bd136a0bdfb6241037db50729d81568451ebfdbde8"),
@@ -378,9 +504,7 @@ func TestCreateUnsetsSystem(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-6bcdb8859d417753645538d7bbfbd7ca91a3f0c191aef5379c53c05e86b669dd"), filepath.Join(p, "blobs", "sha256-6bcdb8859d417753645538d7bbfbd7ca91a3f0c191aef5379c53c05e86b669dd"),
@@ -411,9 +535,7 @@ func TestCreateMergeParameters(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-1d0ad71299d48c2fb7ae2b98e683643e771f8a5b72be34942af90d97a91c1e37"), filepath.Join(p, "blobs", "sha256-1d0ad71299d48c2fb7ae2b98e683643e771f8a5b72be34942af90d97a91c1e37"),
@@ -436,10 +558,7 @@ func TestCreateMergeParameters(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test", "test2")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
})
// Display contents of each blob in the directory // Display contents of each blob in the directory
blobDir := filepath.Join(p, "blobs") blobDir := filepath.Join(p, "blobs")
@@ -495,10 +614,7 @@ func TestCreateMergeParameters(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test", "test2")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
})
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-12f58bb75cb3042d69a7e013ab87fb3c3c7088f50ddc62f0c77bd332f0d44d35"), filepath.Join(p, "blobs", "sha256-12f58bb75cb3042d69a7e013ab87fb3c3c7088f50ddc62f0c77bd332f0d44d35"),
@@ -555,9 +671,7 @@ func TestCreateReplacesMessages(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-298baeaf6928a60cf666d88d64a1ba606feb43a2865687c39e40652e407bffc4"), filepath.Join(p, "blobs", "sha256-298baeaf6928a60cf666d88d64a1ba606feb43a2865687c39e40652e407bffc4"),
@@ -589,10 +703,7 @@ func TestCreateReplacesMessages(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test", "test2")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
})
// Old layers will not have been pruned // Old layers will not have been pruned
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
@@ -650,9 +761,7 @@ func TestCreateTemplateSystem(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-0a04d979734167da3b80811a1874d734697f366a689f3912589b99d2e86e7ad1"), filepath.Join(p, "blobs", "sha256-0a04d979734167da3b80811a1874d734697f366a689f3912589b99d2e86e7ad1"),
@@ -850,9 +959,7 @@ func TestCreateLicenses(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-2af71558e438db0b73a20beab92dc278a94e1bbe974c00c1a33e3ab62d53a608"), filepath.Join(p, "blobs", "sha256-2af71558e438db0b73a20beab92dc278a94e1bbe974c00c1a33e3ab62d53a608"),

View File

@@ -42,10 +42,7 @@ func TestDelete(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test", "test2")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
})
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-136bf7c76bac2ec09d6617885507d37829e04b41acc47687d45e512b544e893a"), filepath.Join(p, "blobs", "sha256-136bf7c76bac2ec09d6617885507d37829e04b41acc47687d45e512b544e893a"),
@@ -60,9 +57,7 @@ func TestDelete(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "test2")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
})
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-136bf7c76bac2ec09d6617885507d37829e04b41acc47687d45e512b544e893a"), filepath.Join(p, "blobs", "sha256-136bf7c76bac2ec09d6617885507d37829e04b41acc47687d45e512b544e893a"),
@@ -76,7 +71,7 @@ func TestDelete(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{}) checkManifestFiles(t)
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{}) checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{})
} }
@@ -109,7 +104,7 @@ func TestDeleteDuplicateLayers(t *testing.T) {
t.Errorf("expected status code 200, actual %d", w.Code) t.Errorf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{}) checkManifestFiles(t)
} }
func TestDeleteCloudSourceNormalizesToLegacyName(t *testing.T) { func TestDeleteCloudSourceNormalizesToLegacyName(t *testing.T) {
@@ -129,14 +124,12 @@ func TestDeleteCloudSourceNormalizesToLegacyName(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ checkManifestFiles(t, "gpt-oss:20b-cloud")
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "gpt-oss", "20b-cloud"),
})
w = createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "gpt-oss:20b:cloud"}) w = createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "gpt-oss:20b:cloud"})
if w.Code != http.StatusOK { if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d (%s)", w.Code, w.Body.String()) t.Fatalf("expected status code 200, actual %d (%s)", w.Code, w.Body.String())
} }
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{}) checkManifestFiles(t)
} }

View File

@@ -1,6 +1,7 @@
package server package server
import ( import (
"bytes"
"encoding/json" "encoding/json"
"net/http" "net/http"
"slices" "slices"
@@ -9,6 +10,8 @@ import (
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/manifest"
"github.com/ollama/ollama/types/model"
) )
func TestList(t *testing.T) { func TestList(t *testing.T) {
@@ -64,3 +67,118 @@ func TestList(t *testing.T) {
t.Fatalf("expected slices to be equal %v", actualNames) t.Fatalf("expected slices to be equal %v", actualNames)
} }
} }
func TestListIncludesAllManifestListChildrenInSize(t *testing.T) {
gin.SetMode(gin.TestMode)
t.Setenv("OLLAMA_MODELS", t.TempDir())
makeConfig := func(t *testing.T, format string) manifest.Layer {
t.Helper()
data, err := json.Marshal(model.ConfigV2{ModelFormat: format})
if err != nil {
t.Fatal(err)
}
layer, err := manifest.NewLayer(bytes.NewReader(data), "application/vnd.docker.container.image.v1+json")
if err != nil {
t.Fatal(err)
}
return layer
}
ggufConfig := makeConfig(t, manifest.FormatGGUF)
mlxConfig := makeConfig(t, manifest.FormatSafetensors)
sharedBlob, err := manifest.NewLayer(bytes.NewReader([]byte("shared-weights")), "application/vnd.ollama.image.model")
if err != nil {
t.Fatal(err)
}
ggufBlob, err := manifest.NewLayer(bytes.NewReader([]byte("gguf-weights")), "application/vnd.ollama.image.model")
if err != nil {
t.Fatal(err)
}
mlxBlob, err := manifest.NewLayer(bytes.NewReader([]byte("mlx-weights")), manifest.MediaTypeImageTensor)
if err != nil {
t.Fatal(err)
}
ggufLayers := []manifest.Layer{
sharedBlob,
ggufBlob,
}
if err := manifest.WriteManifestWithMetadata(model.ParseName("test-gguf"), ggufConfig, ggufLayers, manifest.RunnerGGML, manifest.FormatGGUF); err != nil {
t.Fatal(err)
}
mlxLayers := []manifest.Layer{
{
MediaType: manifest.MediaTypeImageTensor,
Digest: sharedBlob.Digest,
Size: sharedBlob.Size,
},
mlxBlob,
}
if err := manifest.WriteManifestWithMetadata(model.ParseName("test-mlx"), mlxConfig, mlxLayers, manifest.RunnerMLX, manifest.FormatSafetensors); err != nil {
t.Fatal(err)
}
ggufManifest, err := manifest.ParseNamedManifest(model.ParseName("test-gguf"))
if err != nil {
t.Fatal(err)
}
mlxManifest, err := manifest.ParseNamedManifestForRunner(model.ParseName("test-mlx"), manifest.RunnerMLX)
if err != nil {
t.Fatal(err)
}
ggufRef, err := manifest.NewManifestReference(ggufManifest.BlobDigest(), manifest.RunnerGGML, manifest.FormatGGUF)
if err != nil {
t.Fatal(err)
}
mlxRef, err := manifest.NewManifestReference(mlxManifest.BlobDigest(), manifest.RunnerMLX, manifest.FormatSafetensors)
if err != nil {
t.Fatal(err)
}
parentData, err := json.Marshal(manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifestList,
Manifests: []manifest.Manifest{ggufRef, mlxRef},
})
if err != nil {
t.Fatal(err)
}
if err := manifest.WriteManifestData(model.ParseName("test-list"), parentData); err != nil {
t.Fatal(err)
}
var s Server
w := createRequest(t, s.ListHandler, nil)
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
var resp api.ListResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatal(err)
}
var listed *api.ListModelResponse
for i := range resp.Models {
if resp.Models[i].Name == "test-list:latest" {
listed = &resp.Models[i]
break
}
}
if listed == nil {
t.Fatal("test-list:latest not found in list response")
}
want := ggufConfig.Size + sharedBlob.Size + ggufBlob.Size + mlxConfig.Size + mlxBlob.Size
if listed.Size != want {
t.Fatalf("size = %d, want %d", listed.Size, want)
}
}

View File

@@ -20,6 +20,7 @@ import (
"sort" "sort"
"strings" "strings"
"testing" "testing"
"time"
"unicode" "unicode"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
@@ -33,6 +34,58 @@ import (
"github.com/ollama/ollama/version" "github.com/ollama/ollama/version"
) )
func TestPsHandlerUsesRunningManifestAndRunner(t *testing.T) {
gin.SetMode(gin.TestMode)
childDigest := strings.Repeat("a", 64)
s := Server{
sched: &Scheduler{
loaded: map[string]*runnerRef{
"test": {
model: &Model{
ShortName: "test-model:latest",
Digest: strings.Repeat("b", 64),
ManifestDigest: childDigest,
Runner: manifest.RunnerMLX,
Config: model.ConfigV2{
ModelFormat: manifest.FormatSafetensors,
},
},
runner: manifest.RunnerMLX,
totalSize: 1024,
vramSize: 1024,
expiresAt: time.Now().Add(time.Hour),
sessionDuration: time.Hour,
},
},
},
}
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest(http.MethodGet, "/api/ps", nil)
s.PsHandler(c)
if w.Code != http.StatusOK {
t.Fatalf("status = %d, want %d: %s", w.Code, http.StatusOK, w.Body.String())
}
var resp api.ProcessResponse
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatal(err)
}
if len(resp.Models) != 1 {
t.Fatalf("model count = %d, want 1", len(resp.Models))
}
if resp.Models[0].Digest != childDigest {
t.Fatalf("digest = %q, want child digest %q", resp.Models[0].Digest, childDigest)
}
if resp.Models[0].Runner != manifest.RunnerMLX {
t.Fatalf("runner = %q, want %q", resp.Models[0].Runner, manifest.RunnerMLX)
}
}
func createTestFile(t *testing.T, name string) (string, string) { func createTestFile(t *testing.T, name string) (string, string) {
t.Helper() t.Helper()
@@ -658,11 +711,14 @@ func TestManifestCaseSensitivity(t *testing.T) {
checkManifestList := func() { checkManifestList := func() {
t.Helper() t.Helper()
mandir := filepath.Join(os.Getenv("OLLAMA_MODELS"), "manifests/") mandir, err := manifest.V2Path()
if err != nil {
t.Fatalf("failed to resolve v2 manifest path: %v", err)
}
var entries []string var entries []string
t.Logf("dir entries:") t.Logf("dir entries:")
fsys := os.DirFS(mandir) fsys := os.DirFS(mandir)
err := fs.WalkDir(fsys, ".", func(path string, info fs.DirEntry, err error) error { err = fs.WalkDir(fsys, ".", func(path string, info fs.DirEntry, err error) error {
if err != nil { if err != nil {
return err return err
} }
@@ -685,7 +741,14 @@ func TestManifestCaseSensitivity(t *testing.T) {
g := entries[0] // raw path g := entries[0] // raw path
g = filepath.ToSlash(g) g = filepath.ToSlash(g)
w := model.ParseName(wantStableName).Filepath() wp, err := manifest.V2PathForName(model.ParseName(wantStableName))
if err != nil {
t.Fatalf("failed to resolve expected manifest path: %v", err)
}
w, err := filepath.Rel(mandir, wp)
if err != nil {
t.Fatalf("failed to make expected manifest path relative: %v", err)
}
w = filepath.ToSlash(w) w = filepath.ToSlash(w)
if g != w { if g != w {
t.Errorf("\ngot: %s\nwant: %s", g, w) t.Errorf("\ngot: %s\nwant: %s", g, w)
@@ -789,6 +852,212 @@ func TestShow(t *testing.T) {
} }
} }
func createShowSafetensorsLayer(t *testing.T, tensorName string, shape []int64) manifest.Layer {
t.Helper()
header := map[string]any{
tensorName: map[string]any{
"dtype": "F32",
"shape": shape,
"data_offsets": []int64{0, 16},
},
}
headerData, err := json.Marshal(header)
if err != nil {
t.Fatal(err)
}
var buf bytes.Buffer
if err := binary.Write(&buf, binary.LittleEndian, uint64(len(headerData))); err != nil {
t.Fatal(err)
}
buf.Write(headerData)
layer, err := manifest.NewLayer(bytes.NewReader(buf.Bytes()), manifest.MediaTypeImageTensor)
if err != nil {
t.Fatal(err)
}
layer.Name = tensorName
return layer
}
func writeShowManifestVariant(t *testing.T, name, runner, format string, cfg model.ConfigV2, kv map[string]any, extraLayers ...manifest.Layer) {
t.Helper()
configData, err := json.Marshal(cfg)
if err != nil {
t.Fatal(err)
}
configLayer, err := manifest.NewLayer(bytes.NewReader(configData), "application/vnd.docker.container.image.v1+json")
if err != nil {
t.Fatal(err)
}
layers := make([]manifest.Layer, 0, len(extraLayers)+1)
switch format {
case manifest.FormatGGUF:
_, digest := createBinFile(t, kv, nil)
modelLayer, err := manifest.NewLayerFromLayer(digest, "application/vnd.ollama.image.model", name)
if err != nil {
t.Fatal(err)
}
layers = append(layers, modelLayer)
case manifest.FormatSafetensors:
layers = append(layers, createShowSafetensorsLayer(t, name+".weight", []int64{2, 2}))
}
layers = append(layers, extraLayers...)
if err := manifest.WriteManifestWithMetadata(model.ParseName(name), configLayer, layers, runner, format); err != nil {
t.Fatal(err)
}
}
func TestShowAllManifestsNonListReturnsSingleManifest(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
var s Server
_, digest := createBinFile(t, ggml.KV{"general.architecture": "test"}, nil)
createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "show-model",
Files: map[string]string{"model.gguf": digest},
})
w := createRequest(t, s.ShowHandler, api.ShowRequest{
Model: "show-model",
AllManifests: true,
})
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d: %s", w.Code, w.Body.String())
}
var resp api.ShowManifestsResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatal(err)
}
if len(resp.Manifests) != 1 {
t.Fatalf("manifest count = %d, want 1", len(resp.Manifests))
}
if resp.Manifests[0].Runner != manifest.RunnerGGML {
t.Fatalf("runner = %q, want %q", resp.Manifests[0].Runner, manifest.RunnerGGML)
}
if resp.Manifests[0].Details.Format != manifest.FormatGGUF {
t.Fatalf("format = %q, want %q", resp.Manifests[0].Details.Format, manifest.FormatGGUF)
}
if resp.Manifests[0].ModelInfo["general.architecture"] != "test" {
t.Fatalf("architecture = %v, want %q", resp.Manifests[0].ModelInfo["general.architecture"], "test")
}
}
func TestShowAllManifestsManifestListDedupesLicenses(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
licenseLayer, err := manifest.NewLayer(bytes.NewReader([]byte("Apache-2.0")), "application/vnd.ollama.image.license")
if err != nil {
t.Fatal(err)
}
writeShowManifestVariant(t, "show-mlx", manifest.RunnerMLX, manifest.FormatSafetensors, model.ConfigV2{
ModelFormat: manifest.FormatSafetensors,
ModelFamily: "qwen3_5_moe",
ModelType: "35.1B",
FileType: "nvfp4",
Requires: "0.19.0",
Capabilities: []string{"completion", "vision", "thinking", "tools"},
}, nil, licenseLayer)
writeShowManifestVariant(t, "show-ggml", manifest.RunnerGGML, manifest.FormatGGUF, model.ConfigV2{
ModelFormat: manifest.FormatGGUF,
ModelFamily: "qwen35moe",
ModelType: "36.0B",
FileType: "Q4_K_M",
Capabilities: []string{"completion", "vision", "thinking", "tools"},
}, ggml.KV{"general.architecture": "qwen35moe"}, licenseLayer)
mlxManifest, err := manifest.ParseNamedManifestForRunner(model.ParseName("show-mlx"), manifest.RunnerMLX)
if err != nil {
t.Fatal(err)
}
ggmlManifest, err := manifest.ParseNamedManifestForRunner(model.ParseName("show-ggml"), manifest.RunnerGGML)
if err != nil {
t.Fatal(err)
}
mlxRef, err := manifest.NewManifestReference(mlxManifest.BlobDigest(), manifest.RunnerMLX, manifest.FormatSafetensors)
if err != nil {
t.Fatal(err)
}
ggmlRef, err := manifest.NewManifestReference(ggmlManifest.BlobDigest(), manifest.RunnerGGML, manifest.FormatGGUF)
if err != nil {
t.Fatal(err)
}
parentData, err := json.Marshal(manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifestList,
Manifests: []manifest.Manifest{mlxRef, ggmlRef},
})
if err != nil {
t.Fatal(err)
}
if err := manifest.WriteManifestData(model.ParseName("show-list"), parentData); err != nil {
t.Fatal(err)
}
var s Server
w := createRequest(t, s.ShowHandler, api.ShowRequest{
Model: "show-list",
AllManifests: true,
})
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d: %s", w.Code, w.Body.String())
}
var resp api.ShowManifestsResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatal(err)
}
if len(resp.Manifests) != 2 {
t.Fatalf("manifest count = %d, want 2", len(resp.Manifests))
}
if resp.Manifests[0].Runner != manifest.RunnerMLX || resp.Manifests[1].Runner != manifest.RunnerGGML {
t.Fatalf("runner order = [%q %q], want [%q %q]", resp.Manifests[0].Runner, resp.Manifests[1].Runner, manifest.RunnerMLX, manifest.RunnerGGML)
}
if resp.License != "Apache-2.0" {
t.Fatalf("license = %q, want %q", resp.License, "Apache-2.0")
}
if resp.Manifests[0].License != "Apache-2.0" || resp.Manifests[1].License != "Apache-2.0" {
t.Fatalf("child licenses = [%q %q], want both Apache-2.0", resp.Manifests[0].License, resp.Manifests[1].License)
}
if resp.Manifests[0].Requires != "0.19.0" {
t.Fatalf("mlx requires = %q, want %q", resp.Manifests[0].Requires, "0.19.0")
}
if len(resp.Manifests[0].Tensors) != 1 {
t.Fatalf("mlx tensor count = %d, want 1", len(resp.Manifests[0].Tensors))
}
if resp.Manifests[0].Tensors[0].Name != "show-mlx.weight" {
t.Fatalf("mlx tensor name = %q, want %q", resp.Manifests[0].Tensors[0].Name, "show-mlx.weight")
}
}
func TestShowAllManifestsRejectsRunnerSelection(t *testing.T) {
var s Server
w := createRequest(t, s.ShowHandler, api.ShowRequest{
Model: "show-model",
Runner: manifest.RunnerMLX,
AllManifests: true,
})
if w.Code != http.StatusBadRequest {
t.Fatalf("expected status code 400, actual %d: %s", w.Code, w.Body.String())
}
if got := strings.TrimSpace(w.Body.String()); got != `{"error":"runner cannot be used with all_manifests"}` {
t.Fatalf("response = %s", got)
}
}
func TestShowCopilotUserAgentOverwritesExistingBasename(t *testing.T) { func TestShowCopilotUserAgentOverwritesExistingBasename(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir()) t.Setenv("OLLAMA_MODELS", t.TempDir())

View File

@@ -84,7 +84,8 @@ func InitScheduler(ctx context.Context) *Scheduler {
// schedulerModelKey returns the scheduler map key for a model. // schedulerModelKey returns the scheduler map key for a model.
// GGUF-backed models use ModelPath; safetensors/image models without a // GGUF-backed models use ModelPath; safetensors/image models without a
// ModelPath use manifest digest so distinct models don't collide. // ModelPath use the selected manifest digest so distinct child manifests don't
// collide.
func schedulerModelKey(m *Model) string { func schedulerModelKey(m *Model) string {
if m == nil { if m == nil {
return "" return ""
@@ -92,6 +93,9 @@ func schedulerModelKey(m *Model) string {
if m.ModelPath != "" { if m.ModelPath != "" {
return m.ModelPath return m.ModelPath
} }
if m.ManifestDigest != "" {
return "manifest:" + m.ManifestDigest
}
if m.Digest != "" { if m.Digest != "" {
return "digest:" + m.Digest return "digest:" + m.Digest
} }
@@ -530,6 +534,12 @@ iGPUScan:
} }
totalSize, vramSize := llama.MemorySize() totalSize, vramSize := llama.MemorySize()
runnerName := req.model.Runner
if req.model.IsMLX() && runnerName == "" {
runnerName = "mlx"
} else if name := llm.RunnerName(llama); name != "" {
runnerName = name
}
runner := &runnerRef{ runner := &runnerRef{
model: req.model, model: req.model,
modelPath: req.model.ModelPath, modelPath: req.model.ModelPath,
@@ -540,6 +550,7 @@ iGPUScan:
gpus: gpuIDs, gpus: gpuIDs,
discreteGPUs: discreteGPUs, discreteGPUs: discreteGPUs,
isImagegen: slices.Contains(req.model.Config.Capabilities, "image"), isImagegen: slices.Contains(req.model.Config.Capabilities, "image"),
runner: runnerName,
totalSize: totalSize, totalSize: totalSize,
vramSize: vramSize, vramSize: vramSize,
loading: true, loading: true,
@@ -640,6 +651,7 @@ type runnerRef struct {
gpus []ml.DeviceID // Recorded at time of provisioning gpus []ml.DeviceID // Recorded at time of provisioning
discreteGPUs bool // True if all devices are discrete GPUs - used to skip VRAM recovery check for iGPUs discreteGPUs bool // True if all devices are discrete GPUs - used to skip VRAM recovery check for iGPUs
isImagegen bool // True if loaded via imagegen runner (vs mlxrunner) isImagegen bool // True if loaded via imagegen runner (vs mlxrunner)
runner string
vramSize uint64 vramSize uint64
totalSize uint64 totalSize uint64

View File

@@ -499,6 +499,35 @@ func TestSchedGetRunnerUsesDigestKeyWhenModelPathEmpty(t *testing.T) {
require.Len(t, s.pendingReqCh, 1) require.Len(t, s.pendingReqCh, 1)
} }
func TestSchedGetRunnerUsesManifestDigestKeyWhenModelPathEmpty(t *testing.T) {
ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond)
defer done()
s := InitScheduler(ctx)
opts := api.DefaultOptions()
opts.NumCtx = 4
loadedModel := &Model{Name: "list", Digest: "parent", ManifestDigest: "child-a"}
loadedRunner := &runnerRef{
model: loadedModel,
modelKey: schedulerModelKey(loadedModel),
llama: &mockLlm{vramByGPU: map[ml.DeviceID]uint64{}},
Options: &opts,
numParallel: 1,
}
s.loadedMu.Lock()
s.loaded[loadedRunner.modelKey] = loadedRunner
s.loadedMu.Unlock()
reqModel := &Model{Name: "list", Digest: "parent", ManifestDigest: "child-b"}
successCh, errCh := s.GetRunner(ctx, reqModel, opts, nil)
require.Empty(t, successCh)
require.Empty(t, errCh)
require.Len(t, s.pendingReqCh, 1)
}
func TestSchedGetRunnerReusesSameDigestWhenModelPathEmpty(t *testing.T) { func TestSchedGetRunnerReusesSameDigestWhenModelPathEmpty(t *testing.T) {
ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond) ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond)
defer done() defer done()

View File

@@ -142,6 +142,7 @@ func waitForOllamaSignin(ctx context.Context) error {
// RunOptions contains options for running an interactive agent session. // RunOptions contains options for running an interactive agent session.
type RunOptions struct { type RunOptions struct {
Model string Model string
Runner string
Messages []api.Message Messages []api.Message
WordWrap bool WordWrap bool
Format string Format string
@@ -260,6 +261,7 @@ func Chat(ctx context.Context, opts RunOptions) (*api.Message, error) {
for { for {
req := &api.ChatRequest{ req := &api.ChatRequest{
Model: opts.Model, Model: opts.Model,
Runner: opts.Runner,
Messages: messages, Messages: messages,
Format: json.RawMessage(opts.Format), Format: json.RawMessage(opts.Format),
Options: opts.Options, Options: opts.Options,
@@ -638,13 +640,13 @@ func renderToolCalls(toolCalls []api.ToolCall, plainText bool) string {
} }
// checkModelCapabilities checks if the model supports tools. // checkModelCapabilities checks if the model supports tools.
func checkModelCapabilities(ctx context.Context, modelName string) (supportsTools bool, err error) { func checkModelCapabilities(ctx context.Context, modelName, runner string) (supportsTools bool, err error) {
client, err := api.ClientFromEnvironment() client, err := api.ClientFromEnvironment()
if err != nil { if err != nil {
return false, err return false, err
} }
resp, err := client.Show(ctx, &api.ShowRequest{Model: modelName}) resp, err := client.Show(ctx, &api.ShowRequest{Model: modelName, Runner: runner})
if err != nil { if err != nil {
return false, err return false, err
} }
@@ -662,7 +664,7 @@ func checkModelCapabilities(ctx context.Context, modelName string) (supportsTool
// This is called from cmd.go when --experimental flag is set. // This is called from cmd.go when --experimental flag is set.
// If yoloMode is true, all tool approvals are skipped. // If yoloMode is true, all tool approvals are skipped.
// If enableWebsearch is true, the web search tool is registered. // If enableWebsearch is true, the web search tool is registered.
func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, options map[string]any, think *api.ThinkValue, hideThinking bool, keepAlive *api.Duration, yoloMode bool, enableWebsearch bool) error { func GenerateInteractive(cmd *cobra.Command, modelName, runner string, wordWrap bool, options map[string]any, think *api.ThinkValue, hideThinking bool, keepAlive *api.Duration, yoloMode bool, enableWebsearch bool) error {
scanner, err := readline.New(readline.Prompt{ scanner, err := readline.New(readline.Prompt{
Prompt: ">>> ", Prompt: ">>> ",
AltPrompt: "... ", AltPrompt: "... ",
@@ -677,7 +679,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
defer fmt.Printf(readline.EndBracketedPaste) defer fmt.Printf(readline.EndBracketedPaste)
// Check if model supports tools // Check if model supports tools
supportsTools, err := checkModelCapabilities(cmd.Context(), modelName) supportsTools, err := checkModelCapabilities(cmd.Context(), modelName, runner)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "\033[1mwarning:\033[0m could not check model capabilities: %v\n", err) fmt.Fprintf(os.Stderr, "\033[1mwarning:\033[0m could not check model capabilities: %v\n", err)
supportsTools = false supportsTools = false
@@ -807,7 +809,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
think = &thinkValue think = &thinkValue
// Check if model supports thinking // Check if model supports thinking
if client, err := api.ClientFromEnvironment(); err == nil { if client, err := api.ClientFromEnvironment(); err == nil {
if resp, err := client.Show(cmd.Context(), &api.ShowRequest{Model: modelName}); err == nil { if resp, err := client.Show(cmd.Context(), &api.ShowRequest{Model: modelName, Runner: runner}); err == nil {
if !slices.Contains(resp.Capabilities, model.CapabilityThinking) { if !slices.Contains(resp.Capabilities, model.CapabilityThinking) {
fmt.Fprintf(os.Stderr, "warning: model %q does not support thinking output\n", modelName) fmt.Fprintf(os.Stderr, "warning: model %q does not support thinking output\n", modelName)
} }
@@ -822,7 +824,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
think = &api.ThinkValue{Value: false} think = &api.ThinkValue{Value: false}
// Check if model supports thinking // Check if model supports thinking
if client, err := api.ClientFromEnvironment(); err == nil { if client, err := api.ClientFromEnvironment(); err == nil {
if resp, err := client.Show(cmd.Context(), &api.ShowRequest{Model: modelName}); err == nil { if resp, err := client.Show(cmd.Context(), &api.ShowRequest{Model: modelName, Runner: runner}); err == nil {
if !slices.Contains(resp.Capabilities, model.CapabilityThinking) { if !slices.Contains(resp.Capabilities, model.CapabilityThinking) {
fmt.Fprintf(os.Stderr, "warning: model %q does not support thinking output\n", modelName) fmt.Fprintf(os.Stderr, "warning: model %q does not support thinking output\n", modelName)
} }
@@ -884,6 +886,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
} }
req := &api.ShowRequest{ req := &api.ShowRequest{
Name: modelName, Name: modelName,
Runner: runner,
Options: options, Options: options,
} }
resp, err := client.Show(cmd.Context(), req) resp, err := client.Show(cmd.Context(), req)
@@ -981,7 +984,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
} }
// Check if model exists and get its info // Check if model exists and get its info
info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: newModelName}) info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: newModelName, Runner: runner})
if err != nil { if err != nil {
p.StopAndClear() p.StopAndClear()
if strings.Contains(err.Error(), "not found") { if strings.Contains(err.Error(), "not found") {
@@ -996,8 +999,9 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
if info.RemoteHost == "" { if info.RemoteHost == "" {
// Preload the model by sending an empty generate request // Preload the model by sending an empty generate request
req := &api.GenerateRequest{ req := &api.GenerateRequest{
Model: newModelName, Model: newModelName,
Think: think, Runner: runner,
Think: think,
} }
err = client.Generate(cmd.Context(), req, func(r api.GenerateResponse) error { err = client.Generate(cmd.Context(), req, func(r api.GenerateResponse) error {
return nil return nil
@@ -1059,6 +1063,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
verbose, _ := cmd.Flags().GetBool("verbose") verbose, _ := cmd.Flags().GetBool("verbose")
opts := RunOptions{ opts := RunOptions{
Model: modelName, Model: modelName,
Runner: runner,
Messages: messages, Messages: messages,
WordWrap: wordWrap, WordWrap: wordWrap,
Format: format, Format: format,

View File

@@ -389,7 +389,7 @@ func newManifestWriter(opts CreateOptions, capabilities []string, parserName, re
manifestLayers = append(manifestLayers, modelfileLayers...) manifestLayers = append(manifestLayers, modelfileLayers...)
} }
return manifest.WriteManifest(name, configLayer, manifestLayers) return manifest.WriteManifestWithMetadata(name, configLayer, manifestLayers, manifest.RunnerMLX, manifest.FormatSafetensors)
} }
} }

View File

@@ -11,6 +11,8 @@ import (
"strings" "strings"
"github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/envconfig"
rootmanifest "github.com/ollama/ollama/manifest"
"github.com/ollama/ollama/types/model"
) )
// ManifestLayer represents a layer in the manifest. // ManifestLayer represents a layer in the manifest.
@@ -49,9 +51,7 @@ func DefaultManifestDir() string {
// LoadManifest loads a manifest for the given model name. // LoadManifest loads a manifest for the given model name.
// Model name format: "modelname" or "modelname:tag" or "host/namespace/name:tag" // Model name format: "modelname" or "modelname:tag" or "host/namespace/name:tag"
func LoadManifest(modelName string) (*ModelManifest, error) { func LoadManifest(modelName string) (*ModelManifest, error) {
manifestPath := resolveManifestPath(modelName) data, err := rootmanifest.ReadSelectedManifestData(model.ParseName(modelName))
data, err := os.ReadFile(manifestPath)
if err != nil { if err != nil {
return nil, fmt.Errorf("read manifest: %w", err) return nil, fmt.Errorf("read manifest: %w", err)
} }
@@ -67,36 +67,6 @@ func LoadManifest(modelName string) (*ModelManifest, error) {
}, nil }, nil
} }
// resolveManifestPath converts a model name to a manifest file path.
func resolveManifestPath(modelName string) string {
// Parse model name into components
// Default: registry.ollama.ai/library/<name>/<tag>
host := "registry.ollama.ai"
namespace := "library"
name := modelName
tag := "latest"
// Handle explicit tag
if idx := strings.LastIndex(name, ":"); idx != -1 {
tag = name[idx+1:]
name = name[:idx]
}
// Handle full path like "host/namespace/name"
parts := strings.Split(name, "/")
switch len(parts) {
case 3:
host = parts[0]
namespace = parts[1]
name = parts[2]
case 2:
namespace = parts[0]
name = parts[1]
}
return filepath.Join(DefaultManifestDir(), host, namespace, name, tag)
}
// BlobPath returns the full path to a blob given its digest. // BlobPath returns the full path to a blob given its digest.
func (m *ModelManifest) BlobPath(digest string) string { func (m *ModelManifest) BlobPath(digest string) string {
// Convert "sha256:abc123" to "sha256-abc123" // Convert "sha256:abc123" to "sha256-abc123"

View File

@@ -1,8 +1,12 @@
package manifest package manifest
import ( import (
"os"
"path/filepath" "path/filepath"
"testing" "testing"
rootmanifest "github.com/ollama/ollama/manifest"
"github.com/ollama/ollama/types/model"
) )
func TestTotalTensorSize(t *testing.T) { func TestTotalTensorSize(t *testing.T) {
@@ -55,3 +59,39 @@ func TestManifestAndBlobDirsRespectOLLAMAModels(t *testing.T) {
t.Fatalf("DefaultBlobDir() = %q, want %q", got, wantBlobs) t.Fatalf("DefaultBlobDir() = %q, want %q", got, wantBlobs)
} }
} }
func TestLoadManifestPrefersV2(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
legacyPath, err := rootmanifest.PathForName(name)
if err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(filepath.Dir(legacyPath), 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(legacyPath, []byte(`{"schemaVersion":2,"mediaType":"legacy"}`), 0o644); err != nil {
t.Fatal(err)
}
v2Path, err := rootmanifest.V2PathForName(name)
if err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(filepath.Dir(v2Path), 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(v2Path, []byte(`{"schemaVersion":2,"mediaType":"v2"}`), 0o644); err != nil {
t.Fatal(err)
}
m, err := LoadManifest(name.String())
if err != nil {
t.Fatal(err)
}
if m.Manifest.MediaType != "v2" {
t.Fatalf("media type = %q, want %q", m.Manifest.MediaType, "v2")
}
}

View File

@@ -43,7 +43,13 @@ type modelConfig struct {
// GetSafetensorsLLMInfo extracts model information from safetensors LLM models. // GetSafetensorsLLMInfo extracts model information from safetensors LLM models.
// It reads the config.json layer and returns a map compatible with GGML's KV format. // It reads the config.json layer and returns a map compatible with GGML's KV format.
func GetSafetensorsLLMInfo(name model.Name) (map[string]any, error) { func GetSafetensorsLLMInfo(name model.Name) (map[string]any, error) {
mf, err := manifest.ParseNamedManifest(name) return GetSafetensorsLLMInfoForRunner(name, "")
}
// GetSafetensorsLLMInfoForRunner extracts model information from the
// safetensors manifest selected for runner.
func GetSafetensorsLLMInfoForRunner(name model.Name, runner string) (map[string]any, error) {
mf, err := manifest.ParseNamedManifestForRunner(name, runner)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to load manifest: %w", err) return nil, fmt.Errorf("failed to load manifest: %w", err)
} }
@@ -212,7 +218,13 @@ func getParameterCountFromManifest(mf *manifest.Manifest) (int64, error) {
// GetSafetensorsTensorInfo extracts tensor information from safetensors model layers. // GetSafetensorsTensorInfo extracts tensor information from safetensors model layers.
// Each tensor is stored as a minimal safetensors file with an 88-byte header containing metadata. // Each tensor is stored as a minimal safetensors file with an 88-byte header containing metadata.
func GetSafetensorsTensorInfo(name model.Name) ([]api.Tensor, error) { func GetSafetensorsTensorInfo(name model.Name) ([]api.Tensor, error) {
mf, err := manifest.ParseNamedManifest(name) return GetSafetensorsTensorInfoForRunner(name, "")
}
// GetSafetensorsTensorInfoForRunner extracts tensor information from the
// safetensors manifest selected for runner.
func GetSafetensorsTensorInfoForRunner(name model.Name, runner string) ([]api.Tensor, error) {
mf, err := manifest.ParseNamedManifestForRunner(name, runner)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to load manifest: %w", err) return nil, fmt.Errorf("failed to load manifest: %w", err)
} }
@@ -309,7 +321,13 @@ func getTensorInfoFromManifest(mf *manifest.Manifest) ([]api.Tensor, error) {
// Reads quant_type from the first tensor blob's __metadata__. // Reads quant_type from the first tensor blob's __metadata__.
// Falls back to torch_dtype from config.json if no quant metadata. // Falls back to torch_dtype from config.json if no quant metadata.
func GetSafetensorsDtype(name model.Name) (string, error) { func GetSafetensorsDtype(name model.Name) (string, error) {
mf, err := manifest.ParseNamedManifest(name) return GetSafetensorsDtypeForRunner(name, "")
}
// GetSafetensorsDtypeForRunner returns the quantization type from the
// safetensors manifest selected for runner.
func GetSafetensorsDtypeForRunner(name model.Name, runner string) (string, error) {
mf, err := manifest.ParseNamedManifestForRunner(name, runner)
if err != nil { if err != nil {
return "", fmt.Errorf("failed to load manifest: %w", err) return "", fmt.Errorf("failed to load manifest: %w", err)
} }

View File

@@ -9,6 +9,7 @@ import (
"testing" "testing"
"github.com/ollama/ollama/manifest" "github.com/ollama/ollama/manifest"
"github.com/ollama/ollama/types/model"
) )
func TestBuildModelInfo(t *testing.T) { func TestBuildModelInfo(t *testing.T) {
@@ -714,6 +715,99 @@ func TestGetTensorInfoFromManifest_Quantized(t *testing.T) {
} }
} }
func createSafetensorsManifestForRunner(t *testing.T, name, runner, tensorName string) manifest.Manifest {
t.Helper()
configLayer, err := manifest.NewLayer(bytes.NewReader([]byte("{}")), "application/vnd.docker.container.image.v1+json")
if err != nil {
t.Fatal(err)
}
header := map[string]any{
tensorName: map[string]any{
"dtype": "F32",
"shape": []int64{2, 3},
"data_offsets": []int64{0, 24},
},
}
headerData, err := json.Marshal(header)
if err != nil {
t.Fatal(err)
}
var buf bytes.Buffer
if err := binary.Write(&buf, binary.LittleEndian, uint64(len(headerData))); err != nil {
t.Fatal(err)
}
buf.Write(headerData)
tensorLayer, err := manifest.NewLayer(bytes.NewReader(buf.Bytes()), manifest.MediaTypeImageTensor)
if err != nil {
t.Fatal(err)
}
tensorLayer.Name = tensorName
if err := manifest.WriteManifestWithMetadata(model.ParseName(name), configLayer, []manifest.Layer{tensorLayer}, runner, manifest.FormatSafetensors); err != nil {
t.Fatal(err)
}
mf, err := manifest.ParseNamedManifestForRunner(model.ParseName(name), runner)
if err != nil {
t.Fatal(err)
}
return *mf
}
func TestGetSafetensorsTensorInfoForRunnerSelectsChildManifest(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
mlxManifest := createSafetensorsManifestForRunner(t, "runner-mlx", manifest.RunnerMLX, "mlx.weight")
ggmlManifest := createSafetensorsManifestForRunner(t, "runner-ggml", manifest.RunnerGGML, "ggml.weight")
mlxRef, err := manifest.NewManifestReference(mlxManifest.BlobDigest(), manifest.RunnerMLX, manifest.FormatSafetensors)
if err != nil {
t.Fatal(err)
}
ggmlRef, err := manifest.NewManifestReference(ggmlManifest.BlobDigest(), manifest.RunnerGGML, manifest.FormatSafetensors)
if err != nil {
t.Fatal(err)
}
parentData, err := json.Marshal(manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifestList,
Manifests: []manifest.Manifest{ggmlRef, mlxRef},
})
if err != nil {
t.Fatal(err)
}
if err := manifest.WriteManifestData(model.ParseName("runner-list"), parentData); err != nil {
t.Fatal(err)
}
for _, tt := range []struct {
runner string
want string
}{
{runner: manifest.RunnerMLX, want: "mlx.weight"},
{runner: manifest.RunnerGGML, want: "ggml.weight"},
} {
t.Run(tt.runner, func(t *testing.T) {
tensors, err := GetSafetensorsTensorInfoForRunner(model.ParseName("runner-list"), tt.runner)
if err != nil {
t.Fatal(err)
}
if len(tensors) != 1 {
t.Fatalf("tensor count = %d, want 1", len(tensors))
}
if tensors[0].Name != tt.want {
t.Fatalf("tensor name = %q, want %q", tensors[0].Name, tt.want)
}
})
}
}
func TestGetParameterCountFromManifest(t *testing.T) { func TestGetParameterCountFromManifest(t *testing.T) {
// Create a temp directory for blobs and set OLLAMA_MODELS // Create a temp directory for blobs and set OLLAMA_MODELS
tempDir := t.TempDir() tempDir := t.TempDir()