Compare commits

...

10 Commits

Author SHA1 Message Date
Patrick Devine
81b9cb7fa9 ollama pull manifest list support 2026-04-24 12:40:12 -07:00
Patrick Devine
7fd96eba96 ollama push w/ manifest lists 2026-04-24 08:41:11 -07:00
Patrick Devine
2dcc80204d hide the --runner flag in ollama run 2026-04-23 17:39:46 -07:00
Patrick Devine
0d863c8cf4 add ollama show cli 2026-04-23 17:37:27 -07:00
Patrick Devine
f636014ac7 add manifest list support to /api/show 2026-04-23 17:03:03 -07:00
Patrick Devine
00188139f1 manifest lists: fix size calculation in ollama ls 2026-04-23 11:07:54 -07:00
Patrick Devine
9658029516 more manifest list stuff 2026-04-22 18:51:45 -07:00
Patrick Devine
961ae1b10c introduce manifest lists 2026-04-21 18:28:14 -07:00
Patrick Devine
7bcdb250b9 fix failing client2 unit tests 2026-04-21 13:56:39 -07:00
Patrick Devine
7bbcd2e6be server: add v2 manifest path
This change adds a new manifest-v2/ path for new models created with the
create/pull/copy commands. Under manifest-v2, manifests are now just blobs which are
content addressable similar to tensors/config files. The named tags instead
will symlink/hard link/contain a copy depending on what the file system supports.

Downgrades to older versions of ollama are still possible, but any create/pull/copy
done with the newer version will potentially have its blobs pruned by the older
version.

manifest-v2 also changes the default registry name to `ollama.com` instead of
`registry.ollama.ai`.
2026-04-21 12:05:54 -07:00
28 changed files with 4291 additions and 441 deletions

View File

@@ -403,6 +403,21 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err
return &resp, nil
}
// ShowManifests obtains model information for all manifests in a manifest list.
func (c *Client) ShowManifests(ctx context.Context, req *ShowRequest) (*ShowManifestsResponse, error) {
showReq := &ShowRequest{AllManifests: true}
if req != nil {
*showReq = *req
showReq.AllManifests = true
}
var resp ShowManifestsResponse
if err := c.do(ctx, http.MethodPost, "/api/show", showReq, &resp); err != nil {
return nil, err
}
return &resp, nil
}
// Heartbeat checks if the server has started and is responsive; if yes, it
// returns nil, otherwise an error.
func (c *Client) Heartbeat(ctx context.Context) error {

View File

@@ -64,6 +64,9 @@ type GenerateRequest struct {
// the library at https://ollama.com/library
Model string `json:"model"`
// Runner selects a runner variant from a manifest list.
Runner string `json:"runner,omitempty"`
// Prompt is the textual prompt to send to the model.
Prompt string `json:"prompt"`
@@ -148,6 +151,9 @@ type ChatRequest struct {
// Model is the model name, as in [GenerateRequest].
Model string `json:"model"`
// Runner selects a runner variant from a manifest list.
Runner string `json:"runner,omitempty"`
// Messages is the messages of the chat - can be used to keep a chat memory.
Messages []Message `json:"messages"`
@@ -675,6 +681,9 @@ type CreateRequest struct {
// From is the name of the model or file to use as the source.
From string `json:"from,omitempty"`
// List is the list of local model tags to include in a manifest list.
List []string `json:"list,omitempty"`
// RemoteHost is the URL of the upstream ollama API for the model (if any).
RemoteHost string `json:"remote_host,omitempty"`
@@ -724,8 +733,10 @@ type DeleteRequest struct {
// ShowRequest is the request passed to [Client.Show].
type ShowRequest struct {
Model string `json:"model"`
System string `json:"system"`
Model string `json:"model"`
Runner string `json:"runner,omitempty"`
AllManifests bool `json:"all_manifests,omitempty"`
System string `json:"system"`
// Template is deprecated
Template string `json:"template"`
@@ -758,6 +769,18 @@ type ShowResponse struct {
Requires string `json:"requires,omitempty"`
}
// ShowManifest is a single manifest summary returned from [Client.ShowManifests].
type ShowManifest struct {
Runner string `json:"runner,omitempty"`
ShowResponse
}
// ShowManifestsResponse is the response returned from [Client.ShowManifests].
type ShowManifestsResponse struct {
Manifests []ShowManifest `json:"manifests"`
License string `json:"license,omitempty"`
}
// CopyRequest is the request passed to [Client.Copy].
type CopyRequest struct {
Source string `json:"source"`
@@ -829,6 +852,7 @@ type ProcessModelResponse struct {
ExpiresAt time.Time `json:"expires_at"`
SizeVRAM int64 `json:"size_vram"`
ContextLength int `json:"context_length"`
Runner string `json:"runner,omitempty"`
}
type TokenResponse struct {

View File

@@ -98,11 +98,11 @@ func init() {
const ConnectInstructions = "If your browser did not open, navigate to:\n %s\n\n"
// ensureThinkingSupport emits a warning if the model does not advertise thinking support
func ensureThinkingSupport(ctx context.Context, client *api.Client, name string) {
func ensureThinkingSupport(ctx context.Context, client *api.Client, name, runner string) {
if name == "" {
return
}
resp, err := client.Show(ctx, &api.ShowRequest{Model: name})
resp, err := client.Show(ctx, &api.ShowRequest{Model: name, Runner: runner})
if err != nil {
return
}
@@ -156,6 +156,45 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return fmt.Errorf("invalid model name: %s", modelName)
}
list, _ := cmd.Flags().GetStringSlice("combine")
if len(list) > 0 {
if experimental, _ := cmd.Flags().GetBool("experimental"); experimental {
return errors.New("--combine cannot be used with --experimental")
}
if quantize, _ := cmd.Flags().GetString("quantize"); quantize != "" {
return errors.New("--combine cannot be used with --quantize")
}
if cmd.Flags().Changed("file") {
return errors.New("--combine cannot be used with --file")
}
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
req := &api.CreateRequest{
Model: modelName,
List: list,
}
status := "creating manifest list"
spinner := progress.NewSpinner(status)
p.Add(status, spinner)
fn := func(resp api.ProgressResponse) error {
if status != resp.Status {
spinner.Stop()
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
}
return nil
}
return client.Create(cmd.Context(), req, fn)
}
// Check for --experimental flag for safetensors model creation
// This gates both safetensors LLM and imagegen model creation
experimental, _ := cmd.Flags().GetBool("experimental")
@@ -399,7 +438,7 @@ func loadOrUnloadModel(cmd *cobra.Command, opts *runOptions) error {
requestedCloud := modelref.HasExplicitCloudSource(opts.Model)
if info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: opts.Model}); err != nil {
if info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: opts.Model, Runner: opts.Runner}); err != nil {
return err
} else if info.RemoteHost != "" || requestedCloud {
// Cloud model, no need to load/unload
@@ -431,6 +470,7 @@ func loadOrUnloadModel(cmd *cobra.Command, opts *runOptions) error {
req := &api.GenerateRequest{
Model: opts.Model,
Runner: opts.Runner,
KeepAlive: opts.KeepAlive,
// pass Think here so we fail before getting to the chat prompt if the model doesn't support it
@@ -562,6 +602,14 @@ func RunHandler(cmd *cobra.Command, args []string) error {
ShowConnect: true,
}
if flag := cmd.Flags().Lookup("runner"); flag != nil {
runner, err := cmd.Flags().GetString("runner")
if err != nil {
return err
}
opts.Runner = runner
}
format, err := cmd.Flags().GetString("format")
if err != nil {
return err
@@ -651,7 +699,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
requestedCloud := modelref.HasExplicitCloudSource(name)
info, err := func() (*api.ShowResponse, error) {
showReq := &api.ShowRequest{Name: name}
showReq := &api.ShowRequest{Name: name, Runner: opts.Runner}
info, err := client.Show(cmd.Context(), showReq)
var se api.StatusError
if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
@@ -661,7 +709,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
if err := PullHandler(cmd, []string{name}); err != nil {
return nil, err
}
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
return client.Show(cmd.Context(), &api.ShowRequest{Name: name, Runner: opts.Runner})
}
return info, err
}()
@@ -761,7 +809,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
// Use experimental agent loop with tools
if isExperimental {
return xcmd.GenerateInteractive(cmd, opts.Model, opts.WordWrap, opts.Options, opts.Think, opts.HideThinking, opts.KeepAlive, yoloMode, enableWebsearch)
return xcmd.GenerateInteractive(cmd, opts.Model, opts.Runner, opts.WordWrap, opts.Options, opts.Think, opts.HideThinking, opts.KeepAlive, yoloMode, enableWebsearch)
}
return generateInteractive(cmd, opts)
@@ -1000,12 +1048,12 @@ func ListRunningHandler(cmd *cobra.Command, args []string) error {
until = format.HumanTime(m.ExpiresAt, "Never")
}
ctxStr := strconv.Itoa(m.ContextLength)
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, ctxStr, until})
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, ctxStr, m.Runner, until})
}
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"NAME", "ID", "SIZE", "PROCESSOR", "CONTEXT", "UNTIL"})
table.SetHeader([]string{"NAME", "ID", "SIZE", "PROCESSOR", "CONTEXT", "RUNNER", "UNTIL"})
table.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
table.SetAlignment(tablewriter.ALIGN_LEFT)
table.SetHeaderLine(false)
@@ -1095,6 +1143,21 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
}
req := api.ShowRequest{Name: args[0], Verbose: verbose}
if flagsSet == 0 && !verbose {
resp, err := client.ShowManifests(cmd.Context(), &req)
if err != nil {
return err
}
if len(resp.Manifests) > 1 {
return showManifestListInfo(resp, os.Stdout)
}
if len(resp.Manifests) == 1 {
return showInfo(&resp.Manifests[0].ShowResponse, verbose, os.Stdout)
}
return nil
}
resp, err := client.Show(cmd.Context(), &req)
if err != nil {
return err
@@ -1120,6 +1183,211 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
return showInfo(resp, verbose, os.Stdout)
}
func showManifestListInfo(resp *api.ShowManifestsResponse, w io.Writer) error {
tableRender := func(header string, rows func() [][]string) {
fmt.Fprintln(w, " ", header)
table := tablewriter.NewWriter(w)
table.SetAlignment(tablewriter.ALIGN_LEFT)
table.SetBorder(false)
table.SetNoWhiteSpace(true)
table.SetTablePadding(" ")
if header == "License" {
table.SetColWidth(100)
}
table.AppendBulk(rows())
table.Render()
fmt.Fprintln(w)
}
runners := make([]string, len(resp.Manifests))
for i, m := range resp.Manifests {
runners[i] = m.Runner
if runners[i] == "" {
runners[i] = fmt.Sprintf("manifest %d", i+1)
}
}
headerRow := func(labelColumn bool) []string {
row := []string{""}
if labelColumn {
row = append(row, "")
}
return append(row, runners...)
}
tableRender("Model", func() (rows [][]string) {
rows = append(rows, headerRow(true))
for _, field := range []struct {
name string
value func(api.ShowResponse) string
}{
{"architecture", showArchitecture},
{"parameters", showParameterSize},
{"context length", func(resp api.ShowResponse) string { return showModelInfoNumber(resp, "context_length") }},
{"embedding length", func(resp api.ShowResponse) string { return showModelInfoNumber(resp, "embedding_length") }},
{"quantization", func(resp api.ShowResponse) string { return resp.Details.QuantizationLevel }},
{"requires", func(resp api.ShowResponse) string { return resp.Requires }},
} {
row := []string{"", field.name}
hasValue := false
for _, m := range resp.Manifests {
value := field.value(m.ShowResponse)
if value != "" {
hasValue = true
}
row = append(row, value)
}
if hasValue {
rows = append(rows, row)
}
}
return rows
})
capabilities := showCapabilities(resp.Manifests)
if len(capabilities) > 0 {
tableRender("Capabilities", func() (rows [][]string) {
rows = append(rows, headerRow(false))
for _, capability := range capabilities {
row := []string{""}
for _, m := range resp.Manifests {
if slices.Contains(m.Capabilities, capability) {
row = append(row, capability.String())
} else {
row = append(row, "")
}
}
rows = append(rows, row)
}
return rows
})
}
parameterKeys, parameterValues := showParameterValues(resp.Manifests)
if len(parameterKeys) > 0 {
tableRender("Parameters", func() (rows [][]string) {
rows = append(rows, headerRow(true))
for _, key := range parameterKeys {
row := []string{"", key}
for _, values := range parameterValues {
row = append(row, values[key])
}
rows = append(rows, row)
}
return rows
})
}
if resp.License != "" {
tableRender("License", func() [][]string {
return showHeadRows(resp.License, 2)
})
}
return nil
}
func showCapabilities(manifests []api.ShowManifest) []model.Capability {
seen := make(map[model.Capability]struct{})
var capabilities []model.Capability
for _, m := range manifests {
for _, capability := range m.Capabilities {
if _, ok := seen[capability]; ok {
continue
}
seen[capability] = struct{}{}
capabilities = append(capabilities, capability)
}
}
return capabilities
}
func showArchitecture(resp api.ShowResponse) string {
if resp.ModelInfo != nil {
if arch, _ := resp.ModelInfo["general.architecture"].(string); arch != "" {
return arch
}
}
return resp.Details.Family
}
func showParameterSize(resp api.ShowResponse) string {
if resp.Details.ParameterSize != "" {
return resp.Details.ParameterSize
}
if resp.ModelInfo != nil {
if v, ok := resp.ModelInfo["general.parameter_count"]; ok {
if f, ok := v.(float64); ok {
return format.HumanNumber(uint64(f))
}
}
}
return ""
}
func showModelInfoNumber(resp api.ShowResponse, key string) string {
if resp.ModelInfo == nil {
return ""
}
arch, _ := resp.ModelInfo["general.architecture"].(string)
if arch == "" {
return ""
}
if v, ok := resp.ModelInfo[fmt.Sprintf("%s.%s", arch, key)]; ok {
if f, ok := v.(float64); ok {
return strconv.FormatFloat(f, 'f', -1, 64)
}
}
return ""
}
func showParameterValues(manifests []api.ShowManifest) ([]string, []map[string]string) {
seen := make(map[string]struct{})
var keys []string
values := make([]map[string]string, len(manifests))
for i, m := range manifests {
values[i] = make(map[string]string)
scanner := bufio.NewScanner(strings.NewReader(m.Parameters))
for scanner.Scan() {
fields := strings.Fields(scanner.Text())
if len(fields) == 0 {
continue
}
key := fields[0]
values[i][key] = strings.Join(fields[1:], " ")
if _, ok := seen[key]; !ok {
seen[key] = struct{}{}
keys = append(keys, key)
}
}
}
return keys, values
}
func showHeadRows(s string, n int) (rows [][]string) {
scanner := bufio.NewScanner(strings.NewReader(s))
count := 0
for scanner.Scan() {
text := strings.TrimSpace(scanner.Text())
if text == "" {
continue
}
count++
if n < 0 || count <= n {
rows = append(rows, []string{"", text})
}
}
if n >= 0 && count > n {
rows = append(rows, []string{"", "..."})
}
return
}
func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
tableRender := func(header string, rows func() [][]string) {
fmt.Fprintln(w, " ", header)
@@ -1285,34 +1553,15 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
})
}
head := func(s string, n int) (rows [][]string) {
scanner := bufio.NewScanner(strings.NewReader(s))
count := 0
for scanner.Scan() {
text := strings.TrimSpace(scanner.Text())
if text == "" {
continue
}
count++
if n < 0 || count <= n {
rows = append(rows, []string{"", text})
}
}
if n >= 0 && count > n {
rows = append(rows, []string{"", "..."})
}
return
}
if resp.System != "" {
tableRender("System", func() [][]string {
return head(resp.System, 2)
return showHeadRows(resp.System, 2)
})
}
if resp.License != "" {
tableRender("License", func() [][]string {
return head(resp.License, 2)
return showHeadRows(resp.License, 2)
})
}
@@ -1412,6 +1661,7 @@ type generateContextKey string
type runOptions struct {
Model string
Runner string
ParentModel string
LoadedMessages []api.Message
Prompt string
@@ -1463,6 +1713,7 @@ func (r runOptions) Copy() runOptions {
return runOptions{
Model: r.Model,
Runner: r.Runner,
ParentModel: r.ParentModel,
LoadedMessages: loadedMessages,
Prompt: r.Prompt,
@@ -1646,6 +1897,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
req := &api.ChatRequest{
Model: opts.Model,
Runner: opts.Runner,
Messages: opts.Messages,
Format: json.RawMessage(opts.Format),
Options: opts.Options,
@@ -1778,6 +2030,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
request := api.GenerateRequest{
Model: opts.Model,
Runner: opts.Runner,
Prompt: opts.Prompt,
Context: generateContext,
Images: opts.Images,
@@ -2121,6 +2374,7 @@ func NewCLI() *cobra.Command {
}
createCmd.Flags().StringP("file", "f", "", "Name of the Modelfile (default \"Modelfile\")")
createCmd.Flags().StringSlice("combine", nil, "Create a manifest list from comma-separated local models")
createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_K_M)")
createCmd.Flags().Bool("experimental", false, "Enable experimental safetensors model creation")
@@ -2152,6 +2406,8 @@ func NewCLI() *cobra.Command {
runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
runCmd.Flags().String("format", "", "Response format (e.g. json)")
runCmd.Flags().String("runner", "", "Runner to use for manifest list selection (mlx, ggml, llamacpp)")
runCmd.Flags().MarkHidden("runner")
runCmd.Flags().String("think", "", "Enable thinking mode: true/false or high/medium/low for supported models")
runCmd.Flags().Lookup("think").NoOptDefVal = "true"
runCmd.Flags().Bool("hidethinking", false, "Hide thinking output (if provided)")

View File

@@ -326,6 +326,93 @@ Weigh anchor!
})
}
func TestShowManifestListInfo(t *testing.T) {
var b bytes.Buffer
if err := showManifestListInfo(&api.ShowManifestsResponse{
Manifests: []api.ShowManifest{
{
Runner: "mlx",
ShowResponse: api.ShowResponse{
ModelInfo: map[string]any{
"general.architecture": "qwen3_5_moe",
"general.parameter_count": float64(35_100_000_000),
"qwen3_5_moe.context_length": float64(262144),
"qwen3_5_moe.embedding_length": float64(2048),
},
Details: api.ModelDetails{
ParameterSize: "35.1B",
QuantizationLevel: "nvfp4",
},
Requires: "0.19.0",
Capabilities: []model.Capability{model.CapabilityCompletion, model.CapabilityVision, model.CapabilityThinking, model.CapabilityTools},
Parameters: "min_p 0\npresence_penalty 1.5\nrepeat_penalty 1\ntemperature 1\ntop_k 20\ntop_p 0.95\n",
},
},
{
Runner: "ggml",
ShowResponse: api.ShowResponse{
ModelInfo: map[string]any{
"general.architecture": "qwen35moe",
"qwen35moe.context_length": float64(262144),
"qwen35moe.embedding_length": float64(2048),
},
Details: api.ModelDetails{
ParameterSize: "36.0B",
QuantizationLevel: "Q4_K_M",
},
Capabilities: []model.Capability{model.CapabilityCompletion, model.CapabilityVision, model.CapabilityTools, model.CapabilityThinking},
Parameters: "min_p 0\npresence_penalty 1.5\nrepeat_penalty 1\ntemperature 1\ntop_k 20\ntop_p 0.95\n",
},
},
},
License: "Apache License\nVersion 2.0, January 2004\nterms",
}, &b); err != nil {
t.Fatal(err)
}
expect := ` Model
mlx ggml
architecture qwen3_5_moe qwen35moe
parameters 35.1B 36.0B
context length 262144 262144
embedding length 2048 2048
quantization nvfp4 Q4_K_M
requires 0.19.0
Capabilities
mlx ggml
completion completion
vision vision
thinking thinking
tools tools
Parameters
mlx ggml
min_p 0 0
presence_penalty 1.5 1.5
repeat_penalty 1 1
temperature 1 1
top_k 20 20
top_p 0.95 0.95
License
Apache License
Version 2.0, January 2004
...
`
trimLinePadding := func(s string) string {
lines := strings.Split(s, "\n")
for i, line := range lines {
lines[i] = strings.TrimRight(line, " \t\r")
}
return strings.Join(lines, "\n")
}
if diff := cmp.Diff(trimLinePadding(expect), trimLinePadding(b.String())); diff != "" {
t.Errorf("unexpected output (-want +got):\n%s", diff)
}
}
func TestDeleteHandler(t *testing.T) {
stopped := false
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -479,6 +566,143 @@ func TestRunEmbeddingModel(t *testing.T) {
}
}
func TestListRunningHandlerShowsRunner(t *testing.T) {
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/ps" || r.Method != http.MethodGet {
http.NotFound(w, r)
return
}
if err := json.NewEncoder(w).Encode(api.ProcessResponse{
Models: []api.ProcessModelResponse{
{
Name: "test-model:latest",
Model: "test-model:latest",
Digest: "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890",
Size: 1024,
SizeVRAM: 1024,
ContextLength: 4096,
Runner: "mlx",
ExpiresAt: time.Now().Add(time.Hour),
},
},
}); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}))
t.Setenv("OLLAMA_HOST", mockServer.URL)
t.Cleanup(mockServer.Close)
cmd := &cobra.Command{}
cmd.SetContext(t.Context())
oldStdout := os.Stdout
r, w, _ := os.Pipe()
os.Stdout = w
err := ListRunningHandler(cmd, nil)
w.Close()
os.Stdout = oldStdout
if err != nil {
t.Fatal(err)
}
out, err := io.ReadAll(r)
if err != nil {
t.Fatal(err)
}
got := string(out)
for _, want := range []string{"CONTEXT", "RUNNER", "abcdef123456", "mlx"} {
if !strings.Contains(got, want) {
t.Fatalf("output missing %q:\n%s", want, got)
}
}
}
func TestRunHandlerRunnerFlag(t *testing.T) {
showReqCh := make(chan api.ShowRequest, 1)
generateReqCh := make(chan api.GenerateRequest, 1)
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.URL.Path == "/api/show" && r.Method == http.MethodPost:
var req api.ShowRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
showReqCh <- req
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(api.ShowResponse{
Capabilities: []model.Capability{model.CapabilityCompletion},
ModelInfo: map[string]any{},
}); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
case r.URL.Path == "/api/generate" && r.Method == http.MethodPost:
var req api.GenerateRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
generateReqCh <- req
w.Header().Set("Content-Type", "application/x-ndjson")
if err := json.NewEncoder(w).Encode(api.GenerateResponse{
Model: "test-model",
Done: true,
}); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
default:
http.NotFound(w, r)
}
}))
t.Setenv("OLLAMA_HOST", mockServer.URL)
t.Cleanup(mockServer.Close)
cmd := &cobra.Command{}
cmd.SetContext(t.Context())
cmd.Flags().String("keepalive", "", "")
cmd.Flags().Bool("verbose", false, "")
cmd.Flags().Bool("insecure", false, "")
cmd.Flags().Bool("nowordwrap", false, "")
cmd.Flags().String("format", "", "")
cmd.Flags().String("runner", "", "")
cmd.Flags().String("think", "", "")
cmd.Flags().Bool("hidethinking", false, "")
if err := cmd.Flags().Set("runner", "llamacpp"); err != nil {
t.Fatal(err)
}
oldStdout := os.Stdout
r, w, _ := os.Pipe()
os.Stdout = w
err := RunHandler(cmd, []string{"test-model", "hello"})
w.Close()
os.Stdout = oldStdout
if _, readErr := io.ReadAll(r); readErr != nil {
t.Fatal(readErr)
}
if err != nil {
t.Fatal(err)
}
select {
case req := <-showReqCh:
if req.Runner != "llamacpp" {
t.Fatalf("show runner = %q, want %q", req.Runner, "llamacpp")
}
default:
t.Fatal("server did not receive show request")
}
select {
case req := <-generateReqCh:
if req.Runner != "llamacpp" {
t.Fatalf("generate runner = %q, want %q", req.Runner, "llamacpp")
}
default:
t.Fatal("server did not receive generate request")
}
}
func TestRunEmbeddingModelWithFlags(t *testing.T) {
reqCh := make(chan api.EmbedRequest, 1)
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -1524,6 +1748,66 @@ func TestCreateHandler(t *testing.T) {
}
}
func TestCreateHandlerManifestList(t *testing.T) {
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/create" {
t.Errorf("unexpected request to %s", r.URL.Path)
http.Error(w, "not found", http.StatusNotFound)
return
}
if r.Method != http.MethodPost {
t.Errorf("expected POST request, got %s", r.Method)
}
var req api.CreateRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
if req.Model != "parent" {
t.Errorf("model = %q, want %q", req.Model, "parent")
}
if !cmp.Equal(req.List, []string{"gguf", "safetensors"}) {
t.Errorf("list = %#v, want %#v", req.List, []string{"gguf", "safetensors"})
}
if req.From != "" || len(req.Files) > 0 {
t.Errorf("manifest list create sent normal create fields: from=%q files=%v", req.From, req.Files)
}
if err := json.NewEncoder(w).Encode(api.ProgressResponse{Status: "success"}); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.(http.Flusher).Flush()
}))
t.Setenv("OLLAMA_HOST", mockServer.URL)
t.Cleanup(mockServer.Close)
cmd := &cobra.Command{}
cmd.Flags().String("file", "", "")
cmd.Flags().String("quantize", "", "")
cmd.Flags().Bool("experimental", false, "")
cmd.Flags().StringSlice("combine", nil, "")
cmd.SetContext(t.Context())
if err := cmd.Flags().Set("combine", "gguf,safetensors"); err != nil {
t.Fatal(err)
}
oldStderr := os.Stderr
r, w, _ := os.Pipe()
os.Stderr = w
err := CreateHandler(cmd, []string{"parent"})
w.Close()
os.Stderr = oldStderr
if _, readErr := io.ReadAll(r); readErr != nil {
t.Fatal(readErr)
}
if err != nil {
t.Fatal(err)
}
}
func TestNewCreateRequest(t *testing.T) {
tests := []struct {
name string

View File

@@ -224,7 +224,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
opts.Messages = []api.Message{}
opts.LoadedMessages = nil
fmt.Printf("Loading model '%s'\n", opts.Model)
info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: opts.Model})
info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: opts.Model, Runner: opts.Runner})
if err != nil {
if strings.Contains(err.Error(), "not found") {
fmt.Printf("Couldn't find model '%s'\n", opts.Model)
@@ -323,7 +323,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
opts.Think = &thinkValue
thinkExplicitlySet = true
if client, err := api.ClientFromEnvironment(); err == nil {
ensureThinkingSupport(cmd.Context(), client, opts.Model)
ensureThinkingSupport(cmd.Context(), client, opts.Model, opts.Runner)
}
if maybeLevel != "" {
fmt.Printf("Set 'think' mode to '%s'.\n", maybeLevel)
@@ -334,7 +334,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
opts.Think = &api.ThinkValue{Value: false}
thinkExplicitlySet = true
if client, err := api.ClientFromEnvironment(); err == nil {
ensureThinkingSupport(cmd.Context(), client, opts.Model)
ensureThinkingSupport(cmd.Context(), client, opts.Model, opts.Runner)
}
fmt.Println("Set 'nothink' mode.")
case "format":
@@ -414,6 +414,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
}
req := &api.ShowRequest{
Name: opts.Model,
Runner: opts.Runner,
System: opts.System,
Options: opts.Options,
}

View File

@@ -47,7 +47,7 @@ func TestWarnMissingThinking(t *testing.T) {
oldStderr := os.Stderr
r, w, _ := os.Pipe()
os.Stderr = w
ensureThinkingSupport(t.Context(), client, "m")
ensureThinkingSupport(t.Context(), client, "m", "")
w.Close()
os.Stderr = oldStderr
out, _ := io.ReadAll(r)

View File

@@ -120,6 +120,18 @@ type ollamaServer struct {
tokenizer tokenizer.Tokenizer // tokenizer handles text encoding/decoding
}
// RunnerName returns the runner implementation name for a LlamaServer.
func RunnerName(s LlamaServer) string {
switch s.(type) {
case *ollamaServer:
return "ggml"
case *llamaServer:
return "llamacpp"
default:
return ""
}
}
// LoadModel will load a model from disk. The model must be in the GGML format.
//
// It collects array values for arrays with a size less than or equal to

View File

@@ -123,25 +123,6 @@ func (l *Layer) Remove() error {
return nil
}
// Ignore corrupt manifests to avoid blocking deletion of layers that are freshly orphaned
ms, err := Manifests(true)
if err != nil {
return err
}
for _, m := range ms {
for _, layer := range append(m.Layers, m.Config) {
if layer.Digest == l.Digest {
// something is using this layer
return nil
}
}
}
blob, err := BlobsPath(l.Digest)
if err != nil {
return err
}
return os.Remove(blob)
_, err := RemoveUnreferencedBlobs(l.Digest)
return err
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,19 +1,24 @@
package manifest
import (
"bytes"
"crypto/sha256"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"slices"
"strings"
"testing"
"github.com/ollama/ollama/types/model"
)
func createManifest(t *testing.T, path, name string) {
func createManifestAtRoot(t *testing.T, path, root, name string) {
t.Helper()
p := filepath.Join(path, "manifests", name)
p := filepath.Join(path, root, name)
if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil {
t.Fatal(err)
}
@@ -29,6 +34,735 @@ func createManifest(t *testing.T, path, name string) {
}
}
func createManifest(t *testing.T, path, name string) {
t.Helper()
createManifestAtRoot(t, path, "manifests", name)
}
func createManifestForTest(configDigest, layerDigest, runner string) Manifest {
return Manifest{
SchemaVersion: 2,
MediaType: MediaTypeManifest,
Runner: runner,
Format: FormatGGUF,
Config: Layer{
MediaType: "application/vnd.docker.container.image.v1+json",
Digest: configDigest,
Size: 12,
},
Layers: []Layer{
{
MediaType: "application/vnd.ollama.image.model",
Digest: layerDigest,
Size: 34,
},
},
}
}
func createManifestListData(t *testing.T, manifests ...Manifest) []byte {
t.Helper()
ml := Manifest{
SchemaVersion: 2,
MediaType: MediaTypeManifestList,
Manifests: manifests,
}
data, err := json.Marshal(ml)
if err != nil {
t.Fatal(err)
}
return data
}
func writeManifestBlobForTest(t *testing.T, data []byte) string {
t.Helper()
digest, err := writeManifestBlob(data)
if err != nil {
t.Fatal(err)
}
return digest
}
func TestWriteManifestStoresManifestAsBlob(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
config := Layer{
MediaType: "application/vnd.docker.container.image.v1+json",
Digest: "sha256:" + strings.Repeat("a", 64),
Size: 12,
}
if err := WriteManifest(name, config, nil); err != nil {
t.Fatal(err)
}
manifestPath, err := V2PathForName(name)
if err != nil {
t.Fatal(err)
}
manifestData, err := os.ReadFile(manifestPath)
if err != nil {
t.Fatal(err)
}
sum := sha256.Sum256(manifestData)
digest := fmt.Sprintf("sha256:%x", sum)
blobPath, err := BlobsPath(digest)
if err != nil {
t.Fatal(err)
}
blobData, err := os.ReadFile(blobPath)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(blobData, manifestData) {
t.Fatal("manifest path and blob content differ")
}
m, err := ParseNamedManifest(name)
if err != nil {
t.Fatal(err)
}
if got := m.Digest(); got != fmt.Sprintf("%x", sum) {
t.Fatalf("digest = %q, want %x", got, sum)
}
if got := m.BlobDigest(); got != digest {
t.Fatalf("blob digest = %q, want %q", got, digest)
}
}
func TestSelectManifestUsesRunnerPreference(t *testing.T) {
ml := Manifest{
SchemaVersion: 2,
MediaType: MediaTypeManifestList,
Manifests: []Manifest{
createManifestForTest("sha256:"+strings.Repeat("a", 64), "sha256:"+strings.Repeat("b", 64), RunnerGGML),
createManifestForTest("sha256:"+strings.Repeat("c", 64), "sha256:"+strings.Repeat("d", 64), RunnerLlamaCPP),
},
}
child, err := selectManifestWithPreferences(ml.Manifests, []string{RunnerLlamaCPP, RunnerGGML})
if err != nil {
t.Fatal(err)
}
if child.Runner != RunnerLlamaCPP {
t.Fatalf("runner = %q, want %q", child.Runner, RunnerLlamaCPP)
}
}
func TestSelectManifestReferenceDoesNotResolveBlob(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
ref, err := NewManifestReference("sha256:"+strings.Repeat("a", 64), RunnerGGML, FormatGGUF)
if err != nil {
t.Fatal(err)
}
child, err := selectManifestReferenceWithPreferences([]Manifest{ref}, []string{RunnerGGML})
if err != nil {
t.Fatal(err)
}
if got := child.BlobDigest(); got != "sha256:"+strings.Repeat("a", 64) {
t.Fatalf("blob digest = %q, want selected reference digest", got)
}
}
func TestSelectManifestRejectsOldOllamaRunner(t *testing.T) {
_, err := selectManifestWithPreferences([]Manifest{
createManifestForTest("sha256:"+strings.Repeat("a", 64), "sha256:"+strings.Repeat("b", 64), "ollama"),
}, []string{RunnerGGML})
if !errors.Is(err, ErrNoCompatibleManifest) {
t.Fatalf("err = %v, want %v", err, ErrNoCompatibleManifest)
}
}
func TestParseNamedManifestResolvesManifestList(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
ggml := createManifestForTest("sha256:"+strings.Repeat("a", 64), "sha256:"+strings.Repeat("b", 64), RunnerGGML)
ggmlData, err := json.Marshal(ggml)
if err != nil {
t.Fatal(err)
}
ggmlDigest := writeManifestBlobForTest(t, ggmlData)
llamacpp := createManifestForTest("sha256:"+strings.Repeat("c", 64), "sha256:"+strings.Repeat("d", 64), RunnerLlamaCPP)
llamacppData, err := json.Marshal(llamacpp)
if err != nil {
t.Fatal(err)
}
llamacppDigest := writeManifestBlobForTest(t, llamacppData)
parentData := createManifestListData(t, llamacpp, ggml)
if err := WriteManifestData(name, parentData); err != nil {
t.Fatal(err)
}
m, err := ParseNamedManifest(name)
if err != nil {
t.Fatal(err)
}
parentSum := sha256.Sum256(parentData)
if got := m.Digest(); got != fmt.Sprintf("%x", parentSum) {
t.Fatalf("digest = %q, want %x", got, parentSum)
}
if got := m.BlobDigest(); got != fmt.Sprintf("sha256:%x", parentSum) {
t.Fatalf("blob digest = %q, want sha256:%x", got, parentSum)
}
if got := m.SelectedDigest(); got != strings.TrimPrefix(ggmlDigest, "sha256:") {
t.Fatalf("selected digest = %q, want %q", got, strings.TrimPrefix(ggmlDigest, "sha256:"))
}
if got := m.Runner; got != RunnerGGML {
t.Fatalf("runner = %q, want %q", got, RunnerGGML)
}
if got := m.Format; got != FormatGGUF {
t.Fatalf("format = %q, want %q", got, FormatGGUF)
}
if got := m.Config.Digest; got != "sha256:"+strings.Repeat("a", 64) {
t.Fatalf("config digest = %q, want selected child config", got)
}
m, err = ParseNamedManifestForRunner(name, RunnerLlamaCPP)
if err != nil {
t.Fatal(err)
}
if got := m.Runner; got != RunnerLlamaCPP {
t.Fatalf("runner = %q, want %q", got, RunnerLlamaCPP)
}
if got := m.SelectedDigest(); got != strings.TrimPrefix(llamacppDigest, "sha256:") {
t.Fatalf("selected digest = %q, want %q", got, strings.TrimPrefix(llamacppDigest, "sha256:"))
}
if got := m.Config.Digest; got != "sha256:"+strings.Repeat("c", 64) {
t.Fatalf("config digest = %q, want selected child config", got)
}
referenced, err := ReferencedBlobDigestsForName(name)
if err != nil {
t.Fatal(err)
}
for _, digest := range []string{llamacppDigest, ggmlDigest} {
if !slices.Contains(referenced, digest) {
t.Fatalf("referenced blob digests missing child manifest %s", digest)
}
}
raw, err := ReadManifestData(name)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(raw, parentData) {
t.Fatal("ReadManifestData did not return the parent manifest list")
}
selected, err := ReadSelectedManifestData(name)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(selected, ggmlData) {
t.Fatal("ReadSelectedManifestData did not return the selected child manifest")
}
}
func TestTotalSizeForNameIncludesAllManifestListChildren(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
sharedLayerData := []byte("shared layer")
ggufConfigData := []byte("gguf config")
ggufLayerData := []byte("gguf layer")
mlxConfigData := []byte("mlx config")
mlxLayerData := []byte("mlx layer")
sharedLayerDigest := writeManifestBlobForTest(t, sharedLayerData)
ggufConfigDigest := writeManifestBlobForTest(t, ggufConfigData)
ggufLayerDigest := writeManifestBlobForTest(t, ggufLayerData)
mlxConfigDigest := writeManifestBlobForTest(t, mlxConfigData)
mlxLayerDigest := writeManifestBlobForTest(t, mlxLayerData)
gguf := Manifest{
SchemaVersion: 2,
MediaType: MediaTypeManifest,
Runner: RunnerGGML,
Format: FormatGGUF,
Config: Layer{
MediaType: "application/vnd.docker.container.image.v1+json",
Digest: ggufConfigDigest,
Size: int64(len(ggufConfigData)),
},
Layers: []Layer{
{
MediaType: "application/vnd.ollama.image.model",
Digest: sharedLayerDigest,
Size: int64(len(sharedLayerData)),
},
{
MediaType: "application/vnd.ollama.image.model",
Digest: ggufLayerDigest,
Size: int64(len(ggufLayerData)),
},
},
}
ggufData, err := json.Marshal(gguf)
if err != nil {
t.Fatal(err)
}
ggufManifestDigest := writeManifestBlobForTest(t, ggufData)
ggufRef, err := NewManifestReference(ggufManifestDigest, gguf.Runner, gguf.Format)
if err != nil {
t.Fatal(err)
}
mlx := Manifest{
SchemaVersion: 2,
MediaType: MediaTypeManifest,
Runner: RunnerMLX,
Format: FormatSafetensors,
Config: Layer{
MediaType: "application/vnd.docker.container.image.v1+json",
Digest: mlxConfigDigest,
Size: int64(len(mlxConfigData)),
},
Layers: []Layer{
{
MediaType: MediaTypeImageTensor,
Digest: sharedLayerDigest,
Size: int64(len(sharedLayerData)),
},
{
MediaType: MediaTypeImageTensor,
Digest: mlxLayerDigest,
Size: int64(len(mlxLayerData)),
},
},
}
mlxData, err := json.Marshal(mlx)
if err != nil {
t.Fatal(err)
}
mlxManifestDigest := writeManifestBlobForTest(t, mlxData)
mlxRef, err := NewManifestReference(mlxManifestDigest, mlx.Runner, mlx.Format)
if err != nil {
t.Fatal(err)
}
if err := WriteManifestData(name, createManifestListData(t, ggufRef, mlxRef)); err != nil {
t.Fatal(err)
}
size, err := TotalSizeForName(name)
if err != nil {
t.Fatal(err)
}
want := int64(len(ggufConfigData) + len(sharedLayerData) + len(ggufLayerData) + len(mlxConfigData) + len(mlxLayerData))
if size != want {
t.Fatalf("size = %d, want %d", size, want)
}
}
func TestPartialManifestListTracksPresentAndMissingChildren(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
configData := []byte("gguf config")
layerData := []byte("gguf layer")
configDigest := writeManifestBlobForTest(t, configData)
layerDigest := writeManifestBlobForTest(t, layerData)
child := Manifest{
SchemaVersion: 2,
MediaType: MediaTypeManifest,
Runner: RunnerGGML,
Format: FormatGGUF,
Config: Layer{
MediaType: "application/vnd.docker.container.image.v1+json",
Digest: configDigest,
Size: int64(len(configData)),
},
Layers: []Layer{
{
MediaType: "application/vnd.ollama.image.model",
Digest: layerDigest,
Size: int64(len(layerData)),
},
},
}
childData, err := json.Marshal(child)
if err != nil {
t.Fatal(err)
}
childDigest := writeManifestBlobForTest(t, childData)
childRef, err := NewManifestReference(childDigest, child.Runner, child.Format)
if err != nil {
t.Fatal(err)
}
missingDigest := "sha256:" + strings.Repeat("e", 64)
missingRef, err := NewManifestReference(missingDigest, RunnerMLX, FormatSafetensors)
if err != nil {
t.Fatal(err)
}
parentData := createManifestListData(t, childRef, missingRef)
if err := WriteManifestData(name, parentData); err != nil {
t.Fatal(err)
}
parentSum := sha256.Sum256(parentData)
parentDigest := fmt.Sprintf("sha256:%x", parentSum)
referenced, err := ReferencedBlobDigestsForName(name)
if err != nil {
t.Fatal(err)
}
for _, digest := range []string{parentDigest, childDigest, missingDigest, configDigest, layerDigest} {
if !slices.Contains(referenced, digest) {
t.Fatalf("referenced blob digests missing %s: %#v", digest, referenced)
}
}
size, err := TotalSizeForName(name)
if err != nil {
t.Fatal(err)
}
want := int64(len(configData) + len(layerData))
if size != want {
t.Fatalf("size = %d, want %d", size, want)
}
}
func TestParseNamedManifestLeavesLegacyManifestInPlace(t *testing.T) {
models := t.TempDir()
t.Setenv("OLLAMA_MODELS", models)
name := model.ParseName("example")
createManifest(t, models, name.Filepath())
manifestPath, err := PathForName(name)
if err != nil {
t.Fatal(err)
}
if _, err := ParseNamedManifest(name); err != nil {
t.Fatal(err)
}
fi, err := os.Lstat(manifestPath)
if err != nil {
t.Fatal(err)
}
if fi.Mode()&os.ModeSymlink != 0 {
t.Fatal("legacy manifest was converted to a symlink while reading")
}
data, err := os.ReadFile(manifestPath)
if err != nil {
t.Fatal(err)
}
sum := sha256.Sum256(data)
blobPath, err := BlobsPath(fmt.Sprintf("sha256:%x", sum))
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(blobPath); !os.IsNotExist(err) {
t.Fatalf("legacy manifest read created blob: %v", err)
}
}
func TestMigrateManifestLinks(t *testing.T) {
models := t.TempDir()
t.Setenv("OLLAMA_MODELS", models)
name := model.ParseName("example")
createManifest(t, models, name.Filepath())
migrated, err := MigrateManifestLinks()
if err != nil {
t.Fatal(err)
}
if migrated != 1 {
t.Fatalf("migrated = %d, want 1", migrated)
}
manifestPath, err := V2PathForName(name)
if err != nil {
t.Fatal(err)
}
manifestData, err := os.ReadFile(manifestPath)
if err != nil {
t.Fatal(err)
}
sum := sha256.Sum256(manifestData)
blobPath, err := BlobsPath(fmt.Sprintf("sha256:%x", sum))
if err != nil {
t.Fatal(err)
}
blobData, err := os.ReadFile(blobPath)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(blobData, manifestData) {
t.Fatal("migrated manifest path and blob content differ")
}
legacyPath, err := PathForName(name)
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(legacyPath); !os.IsNotExist(err) {
t.Fatalf("legacy manifest still exists: %v", err)
}
migrated, err = MigrateManifestLinks()
if err != nil {
t.Fatal(err)
}
if migrated != 0 {
t.Fatalf("migrated on second run = %d, want 0", migrated)
}
if _, err := MigrateManifestLinks(); err != nil {
t.Fatal(err)
}
manifestDataAfter, err := os.ReadFile(manifestPath)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(manifestDataAfter, manifestData) {
t.Fatal("second migration changed manifest content")
}
}
func TestRemoveNamedRemovesUnreferencedManifestBlob(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
if err := WriteManifest(name, Layer{}, nil); err != nil {
t.Fatal(err)
}
m, err := ParseNamedManifest(name)
if err != nil {
t.Fatal(err)
}
blobPath, err := BlobsPath(m.BlobDigest())
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(blobPath); err != nil {
t.Fatal(err)
}
if err := RemoveNamed(name); err != nil {
t.Fatal(err)
}
if _, err := os.Stat(blobPath); !os.IsNotExist(err) {
t.Fatalf("manifest blob still exists: %v", err)
}
}
func TestRemoveNamedTracksManifestListChildBlobs(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
ggmlConfigDigest := writeManifestBlobForTest(t, []byte("ggml config"))
ggmlLayerDigest := writeManifestBlobForTest(t, []byte("ggml layer"))
ggml := createManifestForTest(ggmlConfigDigest, ggmlLayerDigest, RunnerGGML)
ggmlData, err := json.Marshal(ggml)
if err != nil {
t.Fatal(err)
}
writeManifestBlobForTest(t, ggmlData)
llamacppConfigDigest := writeManifestBlobForTest(t, []byte("llamacpp config"))
llamacppLayerDigest := writeManifestBlobForTest(t, []byte("llamacpp layer"))
llamacpp := createManifestForTest(llamacppConfigDigest, llamacppLayerDigest, RunnerLlamaCPP)
llamacppData, err := json.Marshal(llamacpp)
if err != nil {
t.Fatal(err)
}
writeManifestBlobForTest(t, llamacppData)
parentData := createManifestListData(t, ggml, llamacpp)
nameA := model.ParseName("example-a")
nameB := model.ParseName("example-b")
if err := WriteManifestData(nameA, parentData); err != nil {
t.Fatal(err)
}
if err := WriteManifestData(nameB, parentData); err != nil {
t.Fatal(err)
}
parentSum := sha256.Sum256(parentData)
parentPath, err := BlobsPath(fmt.Sprintf("sha256:%x", parentSum))
if err != nil {
t.Fatal(err)
}
referencedBlobs, err := ReferencedBlobDigestsForName(nameA)
if err != nil {
t.Fatal(err)
}
if err := RemoveNamed(nameA); err != nil {
t.Fatal(err)
}
if _, err := os.Stat(parentPath); err != nil {
t.Fatalf("parent list blob was removed while another model uses it: %v", err)
}
for _, digest := range referencedBlobs {
blob, err := BlobsPath(digest)
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(blob); err != nil {
t.Fatalf("referenced blob %s was removed while another model uses it: %v", digest, err)
}
}
if err := RemoveNamed(nameB); err != nil {
t.Fatal(err)
}
if _, err := os.Stat(parentPath); !os.IsNotExist(err) {
t.Fatalf("parent list blob still exists after final remove: %v", err)
}
for _, digest := range referencedBlobs {
blob, err := BlobsPath(digest)
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(blob); !os.IsNotExist(err) {
t.Fatalf("referenced blob %s still exists after final remove: %v", digest, err)
}
}
}
func TestParseNamedManifestRejectsUnsafeSymlinks(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
manifestPath, err := PathForName(name)
if err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(filepath.Dir(manifestPath), 0o755); err != nil {
t.Fatal(err)
}
t.Run("non blob basename", func(t *testing.T) {
target := filepath.Join(t.TempDir(), "not-a-blob")
if err := os.WriteFile(target, []byte(`{"schemaVersion":2}`), 0o644); err != nil {
t.Fatal(err)
}
if err := os.Remove(manifestPath); err != nil && !os.IsNotExist(err) {
t.Fatal(err)
}
if err := os.Symlink(target, manifestPath); err != nil {
t.Skipf("symlink unavailable: %v", err)
}
_, err := ParseNamedManifest(name)
if err == nil || !strings.Contains(err.Error(), "not a sha256 blob") {
t.Fatalf("err = %v, want not a sha256 blob", err)
}
})
t.Run("blob basename outside blob store", func(t *testing.T) {
data := []byte(`{"schemaVersion":2,"mediaType":"application/vnd.docker.distribution.manifest.v2+json"}`)
sum := sha256.Sum256(data)
target := filepath.Join(t.TempDir(), fmt.Sprintf("sha256-%x", sum))
if err := os.WriteFile(target, data, 0o644); err != nil {
t.Fatal(err)
}
if err := os.Remove(manifestPath); err != nil && !os.IsNotExist(err) {
t.Fatal(err)
}
if err := os.Symlink(target, manifestPath); err != nil {
t.Skipf("symlink unavailable: %v", err)
}
_, err := ParseNamedManifest(name)
if err == nil || !strings.Contains(err.Error(), "does not match blob") {
t.Fatalf("err = %v, want does not match blob", err)
}
})
}
func TestParseNamedManifestPrefersV2(t *testing.T) {
models := t.TempDir()
t.Setenv("OLLAMA_MODELS", models)
name := model.ParseName("example")
legacyPath, err := PathForName(name)
if err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(filepath.Dir(legacyPath), 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(legacyPath, []byte(`{"schemaVersion":2,"mediaType":"legacy"}`), 0o644); err != nil {
t.Fatal(err)
}
if err := WriteManifestData(name, []byte(`{"schemaVersion":2,"mediaType":"v2"}`)); err != nil {
t.Fatal(err)
}
m, err := ParseNamedManifest(name)
if err != nil {
t.Fatal(err)
}
if m.MediaType != "v2" {
t.Fatalf("media type = %q, want %q", m.MediaType, "v2")
}
}
func TestManifestsV2ShadowsLegacy(t *testing.T) {
models := t.TempDir()
t.Setenv("OLLAMA_MODELS", models)
name := model.ParseName("example")
createManifest(t, models, name.Filepath())
if err := WriteManifestData(name, []byte(`{"schemaVersion":2,"mediaType":"v2"}`)); err != nil {
t.Fatal(err)
}
ms, err := Manifests(true)
if err != nil {
t.Fatal(err)
}
if len(ms) != 1 {
t.Fatalf("manifest count = %d, want 1", len(ms))
}
var m *Manifest
for gotName, gotManifest := range ms {
if gotName.EqualFold(model.ParseName("example")) {
m = gotManifest
break
}
}
if m == nil {
t.Fatalf("missing v2 manifest for %s", name)
}
if m.MediaType != "v2" {
t.Fatalf("media type = %q, want %q", m.MediaType, "v2")
}
}
func TestManifests(t *testing.T) {
cases := map[string]struct {
ps []string

View File

@@ -14,8 +14,23 @@ import (
var ErrInvalidDigestFormat = errors.New("invalid digest format")
const (
legacyDirName = "manifests"
v2DirName = "manifests-v2"
defaultPublicHost = "registry.ollama.ai"
v2CanonicalHost = "ollama.com"
)
func Path() (string, error) {
path := filepath.Join(envconfig.Models(), "manifests")
return manifestPath(legacyDirName)
}
func V2Path() (string, error) {
return manifestPath(v2DirName)
}
func manifestPath(dir string) (string, error) {
path := filepath.Join(envconfig.Models(), dir)
if err := os.MkdirAll(path, 0o755); err != nil {
return "", fmt.Errorf("%w: ensure path elements are traversable", err)
}
@@ -25,6 +40,10 @@ func Path() (string, error) {
// PathForName returns the path to the manifest file for a specific model name.
func PathForName(n model.Name) (string, error) {
return LegacyPathForName(n)
}
func LegacyPathForName(n model.Name) (string, error) {
if !n.IsValid() {
return "", os.ErrNotExist
}
@@ -37,6 +56,162 @@ func PathForName(n model.Name) (string, error) {
return filepath.Join(manifests, n.Filepath()), nil
}
func V2PathForName(n model.Name) (string, error) {
if !n.IsValid() {
return "", os.ErrNotExist
}
manifests, err := V2Path()
if err != nil {
return "", err
}
return filepath.Join(manifests, canonicalV2Name(n).Filepath()), nil
}
func ResolvePathForName(n model.Name) (string, error) {
path, _, err := resolveManifestPath(n)
return path, err
}
func resolveManifestPath(n model.Name) (string, string, error) {
if !n.IsValid() {
return "", "", os.ErrNotExist
}
v2Path, err := V2PathForName(n)
if err != nil {
return "", "", err
}
if _, err := os.Lstat(v2Path); err == nil {
root, err := V2Path()
return v2Path, root, err
} else if !os.IsNotExist(err) {
return "", "", err
}
legacyRoot, err := Path()
if err != nil {
return "", "", err
}
for _, legacyName := range legacyNameCandidates(n) {
legacyPath := filepath.Join(legacyRoot, legacyName.Filepath())
if _, err := os.Lstat(legacyPath); err == nil {
return legacyPath, legacyRoot, nil
} else if !os.IsNotExist(err) {
return "", "", err
}
}
return "", "", os.ErrNotExist
}
func removeNamedManifestPaths(n model.Name) error {
candidates := legacyNameCandidates(n)
paths := make([]string, 0, 1+len(candidates))
v2Path, err := V2PathForName(n)
if err != nil {
return err
}
paths = append(paths, v2Path)
for _, legacyName := range candidates {
legacyPath, err := LegacyPathForName(legacyName)
if err != nil {
return err
}
paths = append(paths, legacyPath)
}
for _, path := range paths {
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
return err
}
}
return pruneManifestRoots()
}
func removeLegacyManifestPaths(n model.Name) error {
for _, legacyName := range legacyNameCandidates(n) {
legacyPath, err := LegacyPathForName(legacyName)
if err != nil {
return err
}
if err := os.Remove(legacyPath); err != nil && !os.IsNotExist(err) {
return err
}
}
legacyRoot, err := Path()
if err != nil {
return err
}
if err := PruneDirectory(legacyRoot); err != nil && !os.IsNotExist(err) {
return err
}
return nil
}
func pruneManifestRoots() error {
roots := []func() (string, error){Path, V2Path}
for _, rootFn := range roots {
root, err := rootFn()
if err != nil {
return err
}
if err := PruneDirectory(root); err != nil && !os.IsNotExist(err) {
return err
}
}
return nil
}
// normalizeLogicalName maps any public host to the legacy default
// so that map keys use a single identity regardless of on-disk host.
func normalizeLogicalName(n model.Name) model.Name {
if isDefaultPublicHost(n.Host) {
n.Host = defaultPublicHost
}
return n
}
// canonicalV2Name maps any public host to the v2 canonical host
// for use in manifests-v2/ on-disk paths.
func canonicalV2Name(n model.Name) model.Name {
if isDefaultPublicHost(n.Host) {
n.Host = v2CanonicalHost
}
return n
}
func legacyNameCandidates(n model.Name) []model.Name {
names := []model.Name{n}
if !isDefaultPublicHost(n.Host) {
return names
}
alt := n
switch {
case strings.EqualFold(n.Host, defaultPublicHost):
alt.Host = v2CanonicalHost
default:
alt.Host = defaultPublicHost
}
return append(names, alt)
}
func isDefaultPublicHost(host string) bool {
return strings.EqualFold(host, defaultPublicHost) || strings.EqualFold(host, v2CanonicalHost)
}
func BlobsPath(digest string) (string, error) {
// only accept actual sha256 digests
pattern := "^sha256[:-][0-9a-fA-F]{64}$"

View File

@@ -102,7 +102,24 @@ func (s *Server) CreateHandler(c *gin.Context) {
ch <- resp
}
oldManifest, _ := manifest.ParseNamedManifest(name)
oldManifestDigests, _ := manifest.ReferencedBlobDigestsForName(name)
if len(r.List) > 0 {
if err := createManifestList(r, name, fn); err != nil {
ch <- gin.H{"error": err.Error()}
return
}
if !envconfig.NoPrune() && len(oldManifestDigests) > 0 {
if _, err := manifest.RemoveUnreferencedBlobs(oldManifestDigests...); err != nil {
ch <- gin.H{"error": err.Error()}
return
}
}
ch <- api.ProgressResponse{Status: "success"}
return
}
var baseLayers []*layerGGML
var err error
@@ -265,8 +282,8 @@ func (s *Server) CreateHandler(c *gin.Context) {
return
}
if !envconfig.NoPrune() && oldManifest != nil {
if err := oldManifest.RemoveLayers(); err != nil {
if !envconfig.NoPrune() && len(oldManifestDigests) > 0 {
if _, err := manifest.RemoveUnreferencedBlobs(oldManifestDigests...); err != nil {
ch <- gin.H{"error": err.Error()}
}
}
@@ -599,13 +616,162 @@ func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML,
}
fn(api.ProgressResponse{Status: "writing manifest"})
if err := manifest.WriteManifest(name, *configLayer, layers); err != nil {
runner, format := manifestMetadataForConfig(*config)
if err := manifest.WriteManifestWithMetadata(name, *configLayer, layers, runner, format); err != nil {
return err
}
return nil
}
func createManifestList(r api.CreateRequest, name model.Name, fn func(resp api.ProgressResponse)) error {
if err := validateCreateManifestListRequest(r); err != nil {
return err
}
manifests := make([]manifest.Manifest, 0, len(r.List))
for _, ref := range r.List {
ref = strings.TrimSpace(ref)
if ref == "" {
return errors.New("manifest list contains an empty model")
}
fn(api.ProgressResponse{Status: fmt.Sprintf("reading manifest %s", ref)})
modelRef, err := parseAndValidateModelRef(ref)
if err != nil {
return err
}
if modelRef.Source == modelSourceCloud {
return fmt.Errorf("manifest list entries must be local models: %s", ref)
}
childName, err := getExistingName(modelRef.Name)
if err != nil {
return err
}
data, err := manifest.ReadManifestData(childName)
if err != nil {
return fmt.Errorf("read manifest %s: %w", ref, err)
}
var child manifest.Manifest
if err := json.Unmarshal(data, &child); err != nil {
return err
}
if child.MediaType == manifest.MediaTypeManifestList {
return fmt.Errorf("manifest list entry %s is already a manifest list", ref)
}
if err := fillManifestMetadata(&child); err != nil {
return fmt.Errorf("manifest list entry %s: %w", ref, err)
}
childData, err := json.Marshal(child)
if err != nil {
return err
}
childDigest, err := manifest.WriteManifestBlob(childData)
if err != nil {
return err
}
childRef, err := manifest.NewManifestReference(childDigest, child.Runner, child.Format)
if err != nil {
return err
}
manifests = append(manifests, childRef)
}
parent := manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifestList,
Manifests: manifests,
}
data, err := json.Marshal(parent)
if err != nil {
return err
}
fn(api.ProgressResponse{Status: "writing manifest list"})
return manifest.WriteManifestData(name, data)
}
func validateCreateManifestListRequest(r api.CreateRequest) error {
if len(r.List) == 0 {
return errors.New("manifest list must contain at least one model")
}
switch {
case r.From != "", r.RemoteHost != "", len(r.Files) > 0, len(r.Adapters) > 0:
return errors.New("manifest list creation cannot be combined with model creation options")
case r.Template != "", r.System != "", r.License != nil, len(r.Parameters) > 0, len(r.Messages) > 0:
return errors.New("manifest list creation cannot be combined with model creation options")
case r.Renderer != "", r.Parser != "", r.Requires != "", len(r.Info) > 0:
return errors.New("manifest list creation cannot be combined with model creation options")
case r.Quantize != "", r.Quantization != "":
return errors.New("manifest list creation cannot be combined with model creation options")
default:
return nil
}
}
func fillManifestMetadata(m *manifest.Manifest) error {
if m.Runner != "" && m.Format != "" {
return nil
}
config, err := readManifestConfig(m.Config.Digest)
if err != nil {
return err
}
runner, format := manifestMetadataForConfig(config)
if m.Runner == "" {
m.Runner = runner
}
if m.Format == "" {
m.Format = format
}
if m.Runner == "" || m.Format == "" {
return errors.New("manifest is missing runner or format metadata")
}
return nil
}
func readManifestConfig(digest string) (model.ConfigV2, error) {
var config model.ConfigV2
if digest == "" {
return config, errors.New("manifest is missing config digest")
}
configPath, err := manifest.BlobsPath(digest)
if err != nil {
return config, err
}
configFile, err := os.Open(configPath)
if err != nil {
return config, err
}
defer configFile.Close()
return config, json.NewDecoder(configFile).Decode(&config)
}
func manifestMetadataForConfig(config model.ConfigV2) (runner, format string) {
switch strings.ToLower(config.ModelFormat) {
case manifest.FormatSafetensors:
return manifest.RunnerMLX, manifest.FormatSafetensors
case manifest.FormatGGUF, "ggml":
return manifest.RunnerGGML, manifest.FormatGGUF
default:
return "", strings.ToLower(config.ModelFormat)
}
}
func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.ProgressResponse)) (*layerGGML, error) {
ft := layer.GGML.KV().FileType()
var doneBytes atomic.Uint64

View File

@@ -71,6 +71,8 @@ type Model struct {
System string
License []string
Digest string
ManifestDigest string
Runner string
Options map[string]any
Messages []api.Message
@@ -300,17 +302,30 @@ func (m *Model) String() string {
}
func GetModel(name string) (*Model, error) {
return GetModelForRunner(name, "")
}
// GetModelForRunner returns model metadata for name, selecting runner from a
// manifest list when one is specified.
func GetModelForRunner(name, runner string) (*Model, error) {
n := model.ParseName(name)
mf, err := manifest.ParseNamedManifest(n)
mf, err := manifest.ParseNamedManifestForRunner(n, runner)
if err != nil {
return nil, err
}
manifestDigest := mf.SelectedDigest()
if manifestDigest == "" {
manifestDigest = mf.Digest()
}
m := &Model{
Name: n.String(),
ShortName: n.DisplayShortest(),
Digest: mf.Digest(),
Template: template.DefaultTemplate,
Name: n.String(),
ShortName: n.DisplayShortest(),
Digest: mf.Digest(),
ManifestDigest: manifestDigest,
Runner: mf.Runner,
Template: template.DefaultTemplate,
}
if mf.Config.Digest != "" {
@@ -411,66 +426,16 @@ func CopyModel(src, dst model.Name) error {
return nil
}
manifests, err := manifest.Path()
data, err := manifest.ReadManifestData(src)
if err != nil {
return err
}
dstpath := filepath.Join(manifests, dst.Filepath())
if err := os.MkdirAll(filepath.Dir(dstpath), 0o755); err != nil {
return err
}
srcpath := filepath.Join(manifests, src.Filepath())
srcfile, err := os.Open(srcpath)
if err != nil {
return err
}
defer srcfile.Close()
dstfile, err := os.Create(dstpath)
if err != nil {
return err
}
defer dstfile.Close()
_, err = io.Copy(dstfile, srcfile)
return err
}
func deleteUnusedLayers(deleteMap map[string]struct{}) error {
// Ignore corrupt manifests to avoid blocking deletion of layers that are freshly orphaned
manifests, err := manifest.Manifests(true)
if err != nil {
return err
}
for _, manifest := range manifests {
for _, layer := range manifest.Layers {
delete(deleteMap, layer.Digest)
}
delete(deleteMap, manifest.Config.Digest)
}
// only delete the files which are still in the deleteMap
for k := range deleteMap {
fp, err := manifest.BlobsPath(k)
if err != nil {
slog.Info(fmt.Sprintf("couldn't get file path for '%s': %v", k, err))
continue
}
if err := os.Remove(fp); err != nil {
slog.Info(fmt.Sprintf("couldn't remove file '%s': %v", fp, err))
continue
}
}
return nil
return manifest.WriteManifestData(dst, data)
}
func PruneLayers() error {
deleteMap := make(map[string]struct{})
var candidates []string
p, err := manifest.BlobsPath("")
if err != nil {
return err
@@ -511,17 +476,18 @@ func PruneLayers() error {
continue
}
deleteMap[name] = struct{}{}
candidates = append(candidates, name)
}
slog.Info(fmt.Sprintf("total blobs: %d", len(deleteMap)))
slog.Info(fmt.Sprintf("total blobs: %d", len(candidates)))
if err := deleteUnusedLayers(deleteMap); err != nil {
removed, err := manifest.RemoveUnreferencedBlobs(candidates...)
if err != nil {
slog.Error(fmt.Sprintf("couldn't remove unused layers: %v", err))
return nil
}
slog.Info(fmt.Sprintf("total unused blobs removed: %d", len(deleteMap)))
slog.Info(fmt.Sprintf("total unused blobs removed: %d", removed))
return nil
}
@@ -534,29 +500,49 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return errInsecureProtocol
}
mf, err := manifest.ParseNamedManifest(n)
manifestJSON, err := manifest.ReadManifestData(n)
if err != nil {
fn(api.ProgressResponse{Status: "couldn't retrieve manifest"})
return err
}
var stored manifest.Manifest
if err := json.Unmarshal(manifestJSON, &stored); err != nil {
fn(api.ProgressResponse{Status: "couldn't retrieve manifest"})
return err
}
var layers []manifest.Layer
layers = append(layers, mf.Layers...)
if mf.Config.Digest != "" {
layers = append(layers, mf.Config)
manifestMediaType := manifest.MediaTypeManifest
if stored.MediaType == manifest.MediaTypeManifestList {
layers, err = pushLayersForManifestList(stored)
if err != nil {
return err
}
manifestMediaType = manifest.MediaTypeManifestList
} else {
mf, err := manifest.ParseNamedManifest(n)
if err != nil {
fn(api.ProgressResponse{Status: "couldn't retrieve manifest"})
return err
}
layers = append(layers, mf.Layers...)
if mf.Config.Digest != "" {
layers = append(layers, mf.Config)
}
if !hasTensorLayers(layers) {
manifestJSON, err = json.Marshal(mf)
if err != nil {
return err
}
}
}
// Use fast transfer for models with tensor layers (many small blobs)
if hasTensorLayers(layers) {
// Read raw manifest JSON to preserve tensor metadata fields
manifestPath, err := manifest.PathForName(n)
if err != nil {
return err
}
manifestJSON, err := os.ReadFile(manifestPath)
if err != nil {
return err
}
if err := pushWithTransfer(ctx, n, layers, manifestJSON, regOpts, fn); err != nil {
return err
}
@@ -575,13 +561,8 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
requestURL := n.BaseURL()
requestURL = requestURL.JoinPath("v2", n.DisplayNamespaceModel(), "manifests", n.Tag)
manifestJSON, err := json.Marshal(mf)
if err != nil {
return err
}
headers := make(http.Header)
headers.Set("Content-Type", "application/vnd.docker.distribution.manifest.v2+json")
headers.Set("Content-Type", manifestMediaType)
resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, bytes.NewReader(manifestJSON), regOpts)
if err != nil {
return err
@@ -593,22 +574,79 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return nil
}
func pushLayersForManifestList(parent manifest.Manifest) ([]manifest.Layer, error) {
seen := make(map[string]struct{})
var layers []manifest.Layer
addLayer := func(layer manifest.Layer) error {
if layer.Digest == "" {
return nil
}
if _, ok := seen[layer.Digest]; ok {
return nil
}
if layer.Size == 0 {
p, err := manifest.BlobsPath(layer.Digest)
if err != nil {
return err
}
fi, err := os.Stat(p)
if err != nil {
return err
}
layer.Size = fi.Size()
}
seen[layer.Digest] = struct{}{}
layers = append(layers, layer)
return nil
}
for _, child := range parent.Manifests {
childDigest := child.BlobDigest()
if childDigest == "" {
return nil, errors.New("manifest list child is missing digest")
}
if err := addLayer(manifest.Layer{
MediaType: manifest.MediaTypeManifest,
Digest: childDigest,
}); err != nil {
return nil, err
}
resolved, err := resolveShowManifestChild(child)
if err != nil {
return nil, err
}
for _, layer := range resolved.Layers {
if err := addLayer(layer); err != nil {
return nil, err
}
}
if err := addLayer(resolved.Config); err != nil {
return nil, err
}
}
return layers, nil
}
func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
n := model.ParseName(name)
// build deleteMap to prune unused layers
deleteMap := make(map[string]struct{})
existingMf, err := manifest.ParseNamedManifest(n)
existingDigests, err := manifest.ReferencedBlobDigestsForName(n)
if errors.Is(err, os.ErrNotExist) {
// noop
} else if err != nil {
slog.Warn("pulling model with bad existing manifest", "name", name, "error", err)
} else {
for _, l := range existingMf.Layers {
deleteMap[l.Digest] = struct{}{}
}
if existingMf.Config.Digest != "" {
deleteMap[existingMf.Config.Digest] = struct{}{}
for _, digest := range existingDigests {
if blob, err := manifest.BlobsPath(digest); err == nil {
if _, err := os.Stat(blob); err == nil {
deleteMap[digest] = struct{}{}
}
}
}
}
@@ -623,6 +661,13 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return fmt.Errorf("pull model manifest: %s", err)
}
if mf.MediaType == manifest.MediaTypeManifestList {
mf, err = pullSelectedManifest(ctx, n, mf, regOpts, fn)
if err != nil {
return err
}
}
var layers []manifest.Layer
layers = append(layers, mf.Layers...)
if mf.Config.Digest != "" {
@@ -679,25 +724,16 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
fn(api.ProgressResponse{Status: "writing manifest"})
fp, err := manifest.PathForName(n)
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(fp), 0o755); err != nil {
if err := manifest.WriteManifestData(n, manifestData); err != nil {
slog.Info(fmt.Sprintf("couldn't write manifest for %s", n.DisplayShortest()))
return err
}
err = os.WriteFile(fp, manifestData, 0o644)
if err != nil {
slog.Info(fmt.Sprintf("couldn't write to %s", fp))
return err
}
slog.Debug("manifest written", "path", fp, "sha256", fmt.Sprintf("%x", sha256.Sum256(manifestData)), "size", len(manifestData))
slog.Debug("manifest written", "name", n.DisplayShortest(), "sha256", fmt.Sprintf("%x", sha256.Sum256(manifestData)), "size", len(manifestData))
if !envconfig.NoPrune() && len(deleteMap) > 0 {
fn(api.ProgressResponse{Status: "removing unused layers"})
if err := deleteUnusedLayers(deleteMap); err != nil {
if _, err := manifest.RemoveUnreferencedBlobs(candidateBlobDigests(deleteMap)...); err != nil {
fn(api.ProgressResponse{Status: fmt.Sprintf("couldn't remove unused layers: %v", err)})
}
}
@@ -717,8 +753,85 @@ func hasTensorLayers(layers []manifest.Layer) bool {
return false
}
// pullWithTransfer uses the simplified x/transfer package for downloading blobs.
func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer, manifestData []byte, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
func candidateBlobDigests(m map[string]struct{}) []string {
digests := make([]string, 0, len(m))
for digest := range m {
digests = append(digests, digest)
}
return digests
}
func pullSelectedManifest(ctx context.Context, n model.Name, parent *manifest.Manifest, regOpts *registryOptions, fn func(api.ProgressResponse)) (*manifest.Manifest, error) {
child, err := manifest.SelectManifestReference(parent.Manifests)
if err != nil {
return nil, err
}
childDigest := child.BlobDigest()
if childDigest == "" {
return nil, errors.New("manifest list child is missing digest")
}
layer, err := remoteBlobLayer(ctx, n, childDigest, manifest.MediaTypeManifest, regOpts)
if err != nil {
return nil, err
}
if err := downloadWithTransfer(ctx, n, []manifest.Layer{layer}, regOpts, fn); err != nil {
return nil, err
}
if err := verifyBlob(childDigest); err != nil {
return nil, err
}
blobPath, err := manifest.BlobsPath(childDigest)
if err != nil {
return nil, err
}
data, err := os.ReadFile(blobPath)
if err != nil {
return nil, err
}
var mf manifest.Manifest
if err := json.Unmarshal(data, &mf); err != nil {
return nil, err
}
if mf.MediaType == manifest.MediaTypeManifestList {
return nil, errors.New("nested manifest lists are not supported")
}
if mf.Runner == "" {
mf.Runner = child.Runner
}
if mf.Format == "" {
mf.Format = child.Format
}
return &mf, nil
}
func remoteBlobLayer(ctx context.Context, n model.Name, digest, mediaType string, regOpts *registryOptions) (manifest.Layer, error) {
requestURL := n.BaseURL().JoinPath("v2", n.DisplayNamespaceModel(), "blobs", digest)
resp, err := makeRequestWithRetry(ctx, http.MethodHead, requestURL, nil, nil, regOpts)
if err != nil {
return manifest.Layer{}, err
}
defer resp.Body.Close()
size, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64)
if err != nil {
return manifest.Layer{}, err
}
return manifest.Layer{
MediaType: mediaType,
Digest: digest,
Size: size,
}, nil
}
func downloadWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
blobs := make([]transfer.Blob, len(layers))
for i, layer := range layers {
blobs[i] = transfer.Blob{
@@ -773,22 +886,23 @@ func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
return err
}
return nil
}
// pullWithTransfer uses the simplified x/transfer package for downloading blobs.
func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer, manifestData []byte, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
if err := downloadWithTransfer(ctx, n, layers, regOpts, fn); err != nil {
return err
}
// Write manifest
fn(api.ProgressResponse{Status: "writing manifest"})
fp, err := manifest.PathForName(n)
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(fp), 0o755); err != nil {
if err := manifest.WriteManifestData(n, manifestData); err != nil {
return err
}
if err := os.WriteFile(fp, manifestData, 0o644); err != nil {
return err
}
slog.Debug("manifest written", "path", fp, "sha256", fmt.Sprintf("%x", sha256.Sum256(manifestData)), "size", len(manifestData))
slog.Debug("manifest written", "name", n.DisplayShortest(), "sha256", fmt.Sprintf("%x", sha256.Sum256(manifestData)), "size", len(manifestData))
return nil
}
@@ -854,7 +968,7 @@ func pullModelManifest(ctx context.Context, n model.Name, regOpts *registryOptio
requestURL := n.BaseURL().JoinPath("v2", n.DisplayNamespaceModel(), "manifests", n.Tag)
headers := make(http.Header)
headers.Set("Accept", "application/vnd.docker.distribution.manifest.v2+json")
headers.Set("Accept", strings.Join([]string{manifest.MediaTypeManifestList, manifest.MediaTypeManifest}, ", "))
resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, headers, nil, regOpts)
if err != nil {
return nil, nil, err

View File

@@ -1,15 +1,19 @@
package server
import (
"bytes"
"crypto/sha256"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"os"
"strconv"
"strings"
"testing"
"time"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/manifest"
"github.com/ollama/ollama/template"
@@ -57,6 +61,203 @@ func TestPruneLayersSkipsRecentOrphans(t *testing.T) {
}
}
func TestPushLayersForManifestListIncludesChildManifests(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
writeChild := func(name, runner, formatName, layerMediaType string) (manifest.Manifest, manifest.Layer, manifest.Layer) {
t.Helper()
config, err := manifest.NewLayer(strings.NewReader(name+" config"), "application/vnd.docker.container.image.v1+json")
if err != nil {
t.Fatal(err)
}
layer, err := manifest.NewLayer(strings.NewReader(name+" layer"), layerMediaType)
if err != nil {
t.Fatal(err)
}
if err := manifest.WriteManifestWithMetadata(model.ParseName(name), config, []manifest.Layer{layer}, runner, formatName); err != nil {
t.Fatal(err)
}
mf, err := manifest.ParseNamedManifestForRunner(model.ParseName(name), runner)
if err != nil {
t.Fatal(err)
}
return *mf, config, layer
}
mlx, mlxConfig, mlxLayer := writeChild("library/push-mlx:latest", manifest.RunnerMLX, manifest.FormatSafetensors, manifest.MediaTypeImageTensor)
ggml, ggmlConfig, ggmlLayer := writeChild("library/push-ggml:latest", manifest.RunnerGGML, manifest.FormatGGUF, "application/vnd.ollama.image.model")
mlxRef, err := manifest.NewManifestReference(mlx.BlobDigest(), mlx.Runner, mlx.Format)
if err != nil {
t.Fatal(err)
}
ggmlRef, err := manifest.NewManifestReference(ggml.BlobDigest(), ggml.Runner, ggml.Format)
if err != nil {
t.Fatal(err)
}
layers, err := pushLayersForManifestList(manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifestList,
Manifests: []manifest.Manifest{mlxRef, ggmlRef},
})
if err != nil {
t.Fatal(err)
}
want := map[string]string{
mlx.BlobDigest(): manifest.MediaTypeManifest,
ggml.BlobDigest(): manifest.MediaTypeManifest,
mlxConfig.Digest: mlxConfig.MediaType,
mlxLayer.Digest: mlxLayer.MediaType,
ggmlConfig.Digest: ggmlConfig.MediaType,
ggmlLayer.Digest: ggmlLayer.MediaType,
}
if len(layers) != len(want) {
t.Fatalf("layer count = %d, want %d: %#v", len(layers), len(want), layers)
}
for _, layer := range layers {
if wantMediaType, ok := want[layer.Digest]; !ok {
t.Fatalf("unexpected layer digest %q", layer.Digest)
} else if layer.MediaType != wantMediaType {
t.Fatalf("layer %q media type = %q, want %q", layer.Digest, layer.MediaType, wantMediaType)
}
if layer.Size == 0 {
t.Fatalf("layer %q has zero size", layer.Digest)
}
}
if !hasTensorLayers(layers) {
t.Fatal("manifest list push layers did not preserve tensor media type")
}
}
func TestPullModelManifestListDownloadsSelectedChildOnly(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
configData := []byte(`{"architecture":"test"}`)
configDigest := fmt.Sprintf("sha256:%x", sha256.Sum256(configData))
layerData := []byte("selected tensor layer")
layerDigest := fmt.Sprintf("sha256:%x", sha256.Sum256(layerData))
child := manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifest,
Runner: manifest.RunnerGGML,
Format: manifest.FormatGGUF,
Config: manifest.Layer{
MediaType: "application/vnd.docker.container.image.v1+json",
Digest: configDigest,
Size: int64(len(configData)),
},
Layers: []manifest.Layer{
{
MediaType: manifest.MediaTypeImageTensor,
Digest: layerDigest,
Size: int64(len(layerData)),
},
},
}
childData, err := json.Marshal(child)
if err != nil {
t.Fatal(err)
}
childDigest := fmt.Sprintf("sha256:%x", sha256.Sum256(childData))
childRef, err := manifest.NewManifestReference(childDigest, manifest.RunnerGGML, manifest.FormatGGUF)
if err != nil {
t.Fatal(err)
}
unselectedDigest := "sha256:" + strings.Repeat("f", 64)
unselectedRef, err := manifest.NewManifestReference(unselectedDigest, manifest.RunnerLlamaCPP, manifest.FormatGGUF)
if err != nil {
t.Fatal(err)
}
parent := manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifestList,
Manifests: []manifest.Manifest{childRef, unselectedRef},
}
parentData, err := json.Marshal(parent)
if err != nil {
t.Fatal(err)
}
blobs := map[string][]byte{
childDigest: childData,
configDigest: configData,
layerDigest: layerData,
}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodGet && r.URL.Path == "/v2/library/test/manifests/latest":
w.Header().Set("Content-Type", manifest.MediaTypeManifestList)
w.Header().Set("Content-Length", strconv.Itoa(len(parentData)))
_, _ = w.Write(parentData)
case (r.Method == http.MethodHead || r.Method == http.MethodGet) && strings.HasPrefix(r.URL.Path, "/v2/library/test/blobs/"):
digest := strings.TrimPrefix(r.URL.Path, "/v2/library/test/blobs/")
if digest == unselectedDigest {
t.Errorf("requested unselected child manifest %s", digest)
http.Error(w, "unselected child requested", http.StatusNotFound)
return
}
data, ok := blobs[digest]
if !ok {
http.NotFound(w, r)
return
}
w.Header().Set("Content-Length", strconv.Itoa(len(data)))
if r.Method == http.MethodGet {
_, _ = w.Write(data)
}
default:
http.NotFound(w, r)
}
}))
defer ts.Close()
name := strings.TrimPrefix(ts.URL, "http://") + "/library/test:latest"
if err := PullModel(t.Context(), name, &registryOptions{Insecure: true}, func(api.ProgressResponse) {}); err != nil {
t.Fatal(err)
}
n := model.ParseName(name)
gotParentData, err := manifest.ReadManifestData(n)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(gotParentData, parentData) {
t.Fatal("named manifest does not contain the parent manifest list")
}
m, err := manifest.ParseNamedManifest(n)
if err != nil {
t.Fatal(err)
}
if m.Runner != manifest.RunnerGGML {
t.Fatalf("runner = %q, want %q", m.Runner, manifest.RunnerGGML)
}
if m.Config.Digest != configDigest {
t.Fatalf("config digest = %q, want %q", m.Config.Digest, configDigest)
}
for _, digest := range []string{childDigest, configDigest, layerDigest} {
path, err := manifest.BlobsPath(digest)
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(path); err != nil {
t.Fatalf("expected blob %s to exist: %v", digest, err)
}
}
path, err := manifest.BlobsPath(unselectedDigest)
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(path); !os.IsNotExist(err) {
t.Fatalf("unselected child manifest blob exists: %v", err)
}
}
func TestModelCapabilities(t *testing.T) {
// Create completion model (llama architecture without vision)
completionModelPath, _ := createBinFile(t, ggml.KV{

View File

@@ -116,6 +116,10 @@ func (s *Local) serveHTTP(rec *statusCodeRecorder, r *http.Request) {
proxied, err := func() (bool, error) {
switch r.URL.Path {
case "/api/delete":
if s.Fallback != nil {
s.Fallback.ServeHTTP(rec, r)
return true, nil
}
return false, s.handleDelete(rec, r)
case "/api/pull":
return false, s.handlePull(rec, r)

View File

@@ -141,14 +141,29 @@ func (s *Server) modelOptions(model *Model, requestOpts map[string]any) (api.Opt
return opts, nil
}
func normalizeRunner(runner string) (string, error) {
switch strings.ToLower(strings.TrimSpace(runner)) {
case "":
return "", nil
case manifest.RunnerMLX, "mlxrunner":
return manifest.RunnerMLX, nil
case manifest.RunnerGGML:
return manifest.RunnerGGML, nil
case manifest.RunnerLlamaCPP, "llama.cpp", "llama-cpp", "llama_cpp":
return manifest.RunnerLlamaCPP, nil
default:
return "", fmt.Errorf("unknown runner %q", runner)
}
}
// scheduleRunner schedules a runner after validating inputs such as capabilities and model options.
// It returns the allocated runner, model instance, and consolidated options if successful and error otherwise.
func (s *Server) scheduleRunner(ctx context.Context, name string, caps []model.Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) {
func (s *Server) scheduleRunner(ctx context.Context, name, selectedRunner string, caps []model.Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) {
if name == "" {
return nil, nil, nil, fmt.Errorf("model %w", errRequired)
}
model, err := GetModel(name)
model, err := GetModelForRunner(name, selectedRunner)
if err != nil {
return nil, nil, nil, err
}
@@ -207,6 +222,13 @@ func (s *Server) GenerateHandler(c *gin.Context) {
return
}
if runner, err := normalizeRunner(req.Runner); err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
} else {
req.Runner = runner
}
modelRef, err := parseAndValidateModelRef(req.Model)
if err != nil {
writeModelRefParseError(c, err, http.StatusNotFound, fmt.Sprintf("model '%s' not found", req.Model))
@@ -231,11 +253,13 @@ func (s *Server) GenerateHandler(c *gin.Context) {
return
}
m, err := GetModel(name.String())
m, err := GetModelForRunner(name.String(), req.Runner)
if err != nil {
switch {
case errors.Is(err, fs.ErrNotExist):
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
case errors.Is(err, manifest.ErrNoCompatibleManifest):
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
case err.Error() == errtypes.InvalidModelNameErrMsg:
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
default:
@@ -405,7 +429,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
}
}
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), req.Runner, caps, req.Options, req.KeepAlive)
if errors.Is(err, errCapabilityCompletion) {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
return
@@ -727,7 +751,7 @@ func (s *Server) EmbedHandler(c *gin.Context) {
return
}
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), []model.Capability{}, req.Options, req.KeepAlive)
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), "", []model.Capability{}, req.Options, req.KeepAlive)
if err != nil {
handleScheduleError(c, req.Model, err)
return
@@ -882,7 +906,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
name := modelRef.Name
r, _, _, err := s.scheduleRunner(c.Request.Context(), name.String(), []model.Capability{}, req.Options, req.KeepAlive)
r, _, _, err := s.scheduleRunner(c.Request.Context(), name.String(), "", []model.Capability{}, req.Options, req.KeepAlive)
if err != nil {
handleScheduleError(c, req.Model, err)
return
@@ -1081,8 +1105,7 @@ func (s *Server) DeleteHandler(c *gin.Context) {
return
}
m, err := manifest.ParseNamedManifest(n)
if err != nil {
if err := manifest.RemoveNamed(n); err != nil {
switch {
case os.IsNotExist(err):
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", cmp.Or(r.Model, r.Name))})
@@ -1091,16 +1114,85 @@ func (s *Server) DeleteHandler(c *gin.Context) {
}
return
}
}
if err := m.Remove(); err != nil {
func writeShowError(c *gin.Context, model string, err error) {
var statusErr api.StatusError
switch {
case os.IsNotExist(err):
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", model)})
case errors.Is(err, manifest.ErrNoCompatibleManifest):
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
case errors.As(err, &statusErr):
c.JSON(statusErr.StatusCode, gin.H{"error": statusErr.ErrorMessage})
case err.Error() == errtypes.InvalidModelNameErrMsg:
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
default:
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
}
func readBlobData(digest string) ([]byte, error) {
blobPath, err := manifest.BlobsPath(digest)
if err != nil {
return nil, err
}
if err := m.RemoveLayers(); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
return os.ReadFile(blobPath)
}
func resolveShowManifestChild(child manifest.Manifest) (*manifest.Manifest, error) {
if child.MediaType == manifest.MediaTypeManifestList {
return nil, errors.New("nested manifest lists are not supported")
}
resolved := child
if resolved.Config.Digest == "" && len(resolved.Layers) == 0 && resolved.Digest() != "" {
data, err := readBlobData(resolved.BlobDigest())
if err != nil {
return nil, err
}
if err := json.Unmarshal(data, &resolved); err != nil {
return nil, err
}
if resolved.Runner == "" {
resolved.Runner = child.Runner
}
if resolved.Format == "" {
resolved.Format = child.Format
}
}
return &resolved, nil
}
func collectManifestLicenseText(children []manifest.Manifest) (string, error) {
seen := make(map[string]struct{})
var licenses []string
for _, child := range children {
for _, layer := range child.Layers {
if layer.MediaType != "application/vnd.ollama.image.license" || layer.Digest == "" {
continue
}
digest := layer.Digest
if _, ok := seen[digest]; ok {
continue
}
data, err := readBlobData(digest)
if err != nil {
return "", err
}
seen[digest] = struct{}{}
licenses = append(licenses, string(data))
}
}
return strings.Join(licenses, "\n"), nil
}
func (s *Server) ShowHandler(c *gin.Context) {
@@ -1124,6 +1216,15 @@ func (s *Server) ShowHandler(c *gin.Context) {
return
}
if req.Runner, err = normalizeRunner(req.Runner); err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if req.AllManifests && req.Runner != "" {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "runner cannot be used with all_manifests"})
return
}
modelRef, err := parseAndValidateModelRef(req.Model)
if err != nil {
writeModelRefParseError(c, err, http.StatusBadRequest, err.Error())
@@ -1138,19 +1239,20 @@ func (s *Server) ShowHandler(c *gin.Context) {
req.Model = modelRef.Base
if req.AllManifests {
resp, err := GetAllManifestsInfo(req)
if err != nil {
writeShowError(c, req.Model, err)
return
}
c.JSON(http.StatusOK, resp)
return
}
resp, err := GetModelInfo(req)
if err != nil {
var statusErr api.StatusError
switch {
case os.IsNotExist(err):
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
case errors.As(err, &statusErr):
c.JSON(statusErr.StatusCode, gin.H{"error": statusErr.ErrorMessage})
case err.Error() == errtypes.InvalidModelNameErrMsg:
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
default:
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
}
writeShowError(c, req.Model, err)
return
}
@@ -1173,17 +1275,127 @@ func (s *Server) ShowHandler(c *gin.Context) {
c.JSON(http.StatusOK, resp)
}
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
func GetAllManifestsInfo(req api.ShowRequest) (*api.ShowManifestsResponse, error) {
runner, err := normalizeRunner(req.Runner)
if err != nil {
return nil, api.StatusError{
StatusCode: http.StatusBadRequest,
ErrorMessage: err.Error(),
}
}
req.Runner = runner
if req.Runner != "" {
return nil, api.StatusError{
StatusCode: http.StatusBadRequest,
ErrorMessage: "runner cannot be used with all_manifests",
}
}
name := model.ParseName(req.Model)
if !name.IsValid() {
return nil, model.Unqualified(name)
}
name, err := getExistingName(name)
name, err = getExistingName(name)
if err != nil {
return nil, err
}
req.Model = name.String()
data, err := manifest.ReadManifestData(name)
if err != nil {
return nil, err
}
m, err := GetModel(name.String())
var parent manifest.Manifest
if err := json.Unmarshal(data, &parent); err != nil {
return nil, err
}
if parent.MediaType != manifest.MediaTypeManifestList {
resp, err := GetModelInfo(req)
if err != nil {
return nil, err
}
mf, err := manifest.ParseNamedManifestForRunner(name, "")
if err != nil {
return nil, err
}
return &api.ShowManifestsResponse{
Manifests: []api.ShowManifest{{
Runner: mf.Runner,
ShowResponse: *resp,
}},
License: resp.License,
}, nil
}
resolvedChildren := make([]manifest.Manifest, 0, len(parent.Manifests))
resp := &api.ShowManifestsResponse{
Manifests: make([]api.ShowManifest, 0, len(parent.Manifests)),
}
for _, child := range parent.Manifests {
resolved, err := resolveShowManifestChild(child)
if err != nil {
return nil, err
}
if resolved.Runner == "" {
return nil, fmt.Errorf("manifest list child %q is missing runner metadata", resolved.BlobDigest())
}
runner, err := normalizeRunner(resolved.Runner)
if err != nil {
return nil, err
}
resolved.Runner = runner
resolvedChildren = append(resolvedChildren, *resolved)
childResp, err := GetModelInfo(api.ShowRequest{
Model: req.Model,
Runner: resolved.Runner,
System: req.System,
Verbose: req.Verbose,
Options: req.Options,
})
if err != nil {
return nil, err
}
resp.Manifests = append(resp.Manifests, api.ShowManifest{
Runner: resolved.Runner,
ShowResponse: *childResp,
})
}
resp.License, err = collectManifestLicenseText(resolvedChildren)
if err != nil {
return nil, err
}
return resp, nil
}
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
runner, err := normalizeRunner(req.Runner)
if err != nil {
return nil, api.StatusError{
StatusCode: http.StatusBadRequest,
ErrorMessage: err.Error(),
}
}
req.Runner = runner
name := model.ParseName(req.Model)
if !name.IsValid() {
return nil, model.Unqualified(name)
}
name, err = getExistingName(name)
if err != nil {
return nil, err
}
m, err := GetModelForRunner(name.String(), req.Runner)
if err != nil {
return nil, err
}
@@ -1217,7 +1429,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
// For safetensors LLM models (experimental), populate details from config.json
if m.Config.ModelFormat == "safetensors" && slices.Contains(m.Config.Capabilities, "completion") {
if info, err := xserver.GetSafetensorsLLMInfo(name); err == nil {
if info, err := xserver.GetSafetensorsLLMInfoForRunner(name, req.Runner); err == nil {
if arch, ok := info["general.architecture"].(string); ok && arch != "" {
modelDetails.Family = arch
}
@@ -1227,7 +1439,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
}
// Older manifests may not have file_type populated for safetensors models.
if modelDetails.QuantizationLevel == "" {
if dtype, err := xserver.GetSafetensorsDtype(name); err == nil && dtype != "" {
if dtype, err := xserver.GetSafetensorsDtypeForRunner(name, req.Runner); err == nil && dtype != "" {
modelDetails.QuantizationLevel = dtype
}
}
@@ -1242,7 +1454,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
msgs[i] = api.Message{Role: msg.Role, Content: msg.Content}
}
mf, err := manifest.ParseNamedManifest(name)
mf, err := manifest.ParseNamedManifestForRunner(name, req.Runner)
if err != nil {
return nil, err
}
@@ -1327,25 +1539,19 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
}
if slices.Contains(m.Capabilities(), model.CapabilityImage) {
// Populate tensor info if verbose
if req.Verbose {
if tensors, err := xserver.GetSafetensorsTensorInfo(name); err == nil {
resp.Tensors = tensors
}
if tensors, err := xserver.GetSafetensorsTensorInfoForRunner(name, req.Runner); err == nil {
resp.Tensors = tensors
}
return resp, nil
}
// For safetensors LLM models (experimental), populate ModelInfo from config.json
if m.Config.ModelFormat == "safetensors" && slices.Contains(m.Config.Capabilities, "completion") {
if info, err := xserver.GetSafetensorsLLMInfo(name); err == nil {
if info, err := xserver.GetSafetensorsLLMInfoForRunner(name, req.Runner); err == nil {
resp.ModelInfo = info
}
// Populate tensor info if verbose
if req.Verbose {
if tensors, err := xserver.GetSafetensorsTensorInfo(name); err == nil {
resp.Tensors = tensors
}
if tensors, err := xserver.GetSafetensorsTensorInfoForRunner(name, req.Runner); err == nil {
resp.Tensors = tensors
}
return resp, nil
}
@@ -1409,6 +1615,11 @@ func (s *Server) ListHandler(c *gin.Context) {
models := []api.ListModelResponse{}
for n, m := range ms {
var cf model.ConfigV2
size, err := manifest.TotalSizeForName(n)
if err != nil {
slog.Warn("bad manifest size", "name", n, "error", err)
size = m.Size()
}
if m.Config.Digest != "" {
f, err := m.Config.Open()
@@ -1430,7 +1641,7 @@ func (s *Server) ListHandler(c *gin.Context) {
Name: n.DisplayShortest(),
RemoteModel: cf.RemoteModel,
RemoteHost: cf.RemoteHost,
Size: m.Size(),
Size: size,
Digest: m.Digest(),
ModifiedAt: m.FileInfo().ModTime(),
Details: api.ModelDetails{
@@ -1770,13 +1981,15 @@ func Serve(ln net.Listener) error {
return err
}
manifestsPath, err := manifest.Path()
if err != nil {
return err
}
for _, rootFn := range []func() (string, error){manifest.Path, manifest.V2Path} {
manifestsPath, err := rootFn()
if err != nil {
return err
}
if err := manifest.PruneDirectory(manifestsPath); err != nil {
return err
if err := manifest.PruneDirectory(manifestsPath); err != nil && !os.IsNotExist(err) {
return err
}
}
}
}
@@ -2047,6 +2260,17 @@ func (s *Server) PsHandler(c *gin.Context) {
for _, v := range s.sched.loaded {
model := v.model
digest := model.ManifestDigest
if digest == "" {
digest = model.Digest
}
runner := v.runner
if runner == "" {
runner = model.Runner
}
if normalized, err := normalizeRunner(runner); err == nil && normalized != "" {
runner = normalized
}
modelDetails := api.ModelDetails{
Format: model.Config.ModelFormat,
Family: model.Config.ModelFamily,
@@ -2060,9 +2284,10 @@ func (s *Server) PsHandler(c *gin.Context) {
Name: model.ShortName,
Size: int64(v.totalSize),
SizeVRAM: int64(v.vramSize),
Digest: model.Digest,
Digest: digest,
Details: modelDetails,
ExpiresAt: v.expiresAt,
Runner: runner,
}
if v.llama != nil {
mr.ContextLength = v.llama.ContextLength()
@@ -2115,6 +2340,13 @@ func (s *Server) ChatHandler(c *gin.Context) {
return
}
if runner, err := normalizeRunner(req.Runner); err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
} else {
req.Runner = runner
}
modelRef, err := parseAndValidateModelRef(req.Model)
if err != nil {
writeModelRefParseError(c, err, http.StatusBadRequest, "model is required")
@@ -2139,11 +2371,13 @@ func (s *Server) ChatHandler(c *gin.Context) {
return
}
m, err := GetModel(name.String())
m, err := GetModelForRunner(name.String(), req.Runner)
if err != nil {
switch {
case os.IsNotExist(err):
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
case errors.Is(err, manifest.ErrNoCompatibleManifest):
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
case err.Error() == errtypes.InvalidModelNameErrMsg:
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
default:
@@ -2292,7 +2526,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
}
}
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), req.Runner, caps, req.Options, req.KeepAlive)
if errors.Is(err, errCapabilityCompletion) {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
return
@@ -2631,6 +2865,8 @@ func handleScheduleError(c *gin.Context, name string, err error) {
switch {
case errors.Is(err, errCapabilities), errors.Is(err, errRequired):
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
case errors.Is(err, manifest.ErrNoCompatibleManifest):
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
case errors.Is(err, context.Canceled):
c.JSON(499, gin.H{"error": "request canceled"})
case errors.Is(err, ErrMaxQueue):
@@ -2681,7 +2917,7 @@ func (s *Server) handleImageGenerate(c *gin.Context, req api.GenerateRequest, mo
}
// Schedule the runner for image generation
runner, _, _, err := s.scheduleRunner(c.Request.Context(), modelName, []model.Capability{model.CapabilityImage}, nil, req.KeepAlive)
runner, _, _, err := s.scheduleRunner(c.Request.Context(), modelName, req.Runner, []model.Capability{model.CapabilityImage}, nil, req.KeepAlive)
if err != nil {
handleScheduleError(c, req.Model, err)
return

View File

@@ -109,12 +109,44 @@ func checkFileExists(t *testing.T, p string, expect []string) {
if err != nil {
t.Fatal(err)
}
if strings.HasSuffix(filepath.ToSlash(p), "/blobs/*") {
actual = slices.DeleteFunc(actual, isManifestBlobForTest)
}
if diff := gocmp.Diff(expect, actual, gocmpopts.SortSlices(strings.Compare), gocmpopts.EquateEmpty()); diff != "" {
t.Errorf("file exists mismatch (-want +got):\n%s", diff)
}
}
func checkManifestFiles(t *testing.T, names ...string) {
t.Helper()
expect := make([]string, len(names))
for i, name := range names {
p, err := manifest.V2PathForName(model.ParseName(name))
if err != nil {
t.Fatal(err)
}
expect[i] = p
}
checkFileExists(t, filepath.Join(envconfig.Models(), "manifests-v2", "*", "*", "*", "*"), expect)
}
func isManifestBlobForTest(path string) bool {
data, err := os.ReadFile(path)
if err != nil {
return false
}
var m manifest.Manifest
if err := json.Unmarshal(data, &m); err != nil {
return false
}
return m.SchemaVersion != 0 && m.MediaType != "" && (m.Config.Digest != "" || len(m.Layers) > 0)
}
func TestCreateFromBin(t *testing.T) {
gin.SetMode(gin.TestMode)
@@ -136,9 +168,7 @@ func TestCreateFromBin(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkManifestFiles(t, "test")
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-6bcdb8859d417753645538d7bbfbd7ca91a3f0c191aef5379c53c05e86b669dd"),
@@ -196,9 +226,7 @@ func TestCreateFromModel(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkManifestFiles(t, "test")
w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test2",
@@ -210,10 +238,7 @@ func TestCreateFromModel(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
})
checkManifestFiles(t, "test", "test2")
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-6bcdb8859d417753645538d7bbfbd7ca91a3f0c191aef5379c53c05e86b669dd"),
@@ -306,9 +331,7 @@ func TestCreateRemovesLayers(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkManifestFiles(t, "test")
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-89a2116c3a82d6a97f59f748d86ed4417214353fd178ee54df418fde32495fad"),
@@ -327,9 +350,7 @@ func TestCreateRemovesLayers(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkManifestFiles(t, "test")
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-136bf7c76bac2ec09d6617885507d37829e04b41acc47687d45e512b544e893a"),
@@ -338,6 +359,113 @@ func TestCreateRemovesLayers(t *testing.T) {
})
}
func writeManifestListVariant(t *testing.T, name, modelFormat string) {
t.Helper()
configData, err := json.Marshal(model.ConfigV2{
ModelFormat: modelFormat,
Capabilities: []string{"completion"},
})
if err != nil {
t.Fatal(err)
}
configLayer, err := manifest.NewLayer(bytes.NewReader(configData), "application/vnd.docker.container.image.v1+json")
if err != nil {
t.Fatal(err)
}
modelLayer, err := manifest.NewLayer(strings.NewReader(name+" layer"), "application/vnd.ollama.image.license")
if err != nil {
t.Fatal(err)
}
if err := manifest.WriteManifest(model.ParseName(name), configLayer, []manifest.Layer{modelLayer}); err != nil {
t.Fatal(err)
}
}
func TestCreateManifestList(t *testing.T) {
gin.SetMode(gin.TestMode)
t.Setenv("OLLAMA_MODELS", t.TempDir())
var s Server
writeManifestListVariant(t, "test-gguf", manifest.FormatGGUF)
writeManifestListVariant(t, "test-safetensors", manifest.FormatSafetensors)
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "test-list",
List: []string{"test-gguf", "test-safetensors"},
Stream: &stream,
})
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d: %s", w.Code, w.Body.String())
}
data, err := manifest.ReadManifestData(model.ParseName("test-list"))
if err != nil {
t.Fatal(err)
}
var parent manifest.Manifest
if err := json.Unmarshal(data, &parent); err != nil {
t.Fatal(err)
}
if parent.MediaType != manifest.MediaTypeManifestList {
t.Fatalf("mediaType = %q, want %q", parent.MediaType, manifest.MediaTypeManifestList)
}
if len(parent.Manifests) != 2 {
t.Fatalf("manifest count = %d, want 2", len(parent.Manifests))
}
selected, err := manifest.ParseNamedManifest(model.ParseName("test-list"))
if err != nil {
t.Fatal(err)
}
if selected.Config.Digest == "" {
t.Fatal("selected manifest is missing config")
}
mlxInfo, err := GetModelInfo(api.ShowRequest{Model: "test-list", Runner: manifest.RunnerMLX})
if err != nil {
t.Fatal(err)
}
if mlxInfo.Details.Format != manifest.FormatSafetensors {
t.Fatalf("mlx show format = %q, want %q", mlxInfo.Details.Format, manifest.FormatSafetensors)
}
want := map[string]string{
manifest.RunnerGGML: manifest.FormatGGUF,
manifest.RunnerMLX: manifest.FormatSafetensors,
}
for _, child := range parent.Manifests {
if got := want[child.Runner]; got != child.Format {
t.Fatalf("child runner/format = %q/%q, want one of %v", child.Runner, child.Format, want)
}
if child.BlobDigest() == "" {
t.Fatal("child manifest reference is missing digest")
}
if child.Config.Digest != "" || len(child.Layers) != 0 {
t.Fatalf("child manifest reference embedded config/layers: config=%q layers=%d", child.Config.Digest, len(child.Layers))
}
childBlob, err := manifest.BlobsPath(child.BlobDigest())
if err != nil {
t.Fatal(err)
}
childData, err := os.ReadFile(childBlob)
if err != nil {
t.Fatalf("child manifest blob missing: %v", err)
}
var resolved manifest.Manifest
if err := json.Unmarshal(childData, &resolved); err != nil {
t.Fatal(err)
}
if resolved.Config.Digest == "" || len(resolved.Layers) == 0 {
t.Fatalf("resolved child manifest missing config/layers: config=%q layers=%d", resolved.Config.Digest, len(resolved.Layers))
}
}
}
func TestCreateUnsetsSystem(t *testing.T) {
gin.SetMode(gin.TestMode)
@@ -357,9 +485,7 @@ func TestCreateUnsetsSystem(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkManifestFiles(t, "test")
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-0a666d113e8e0a3d27e9c7bd136a0bdfb6241037db50729d81568451ebfdbde8"),
@@ -378,9 +504,7 @@ func TestCreateUnsetsSystem(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkManifestFiles(t, "test")
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-6bcdb8859d417753645538d7bbfbd7ca91a3f0c191aef5379c53c05e86b669dd"),
@@ -411,9 +535,7 @@ func TestCreateMergeParameters(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkManifestFiles(t, "test")
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-1d0ad71299d48c2fb7ae2b98e683643e771f8a5b72be34942af90d97a91c1e37"),
@@ -436,10 +558,7 @@ func TestCreateMergeParameters(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
})
checkManifestFiles(t, "test", "test2")
// Display contents of each blob in the directory
blobDir := filepath.Join(p, "blobs")
@@ -495,10 +614,7 @@ func TestCreateMergeParameters(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
})
checkManifestFiles(t, "test", "test2")
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-12f58bb75cb3042d69a7e013ab87fb3c3c7088f50ddc62f0c77bd332f0d44d35"),
@@ -555,9 +671,7 @@ func TestCreateReplacesMessages(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkManifestFiles(t, "test")
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-298baeaf6928a60cf666d88d64a1ba606feb43a2865687c39e40652e407bffc4"),
@@ -589,10 +703,7 @@ func TestCreateReplacesMessages(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
})
checkManifestFiles(t, "test", "test2")
// Old layers will not have been pruned
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
@@ -650,9 +761,7 @@ func TestCreateTemplateSystem(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkManifestFiles(t, "test")
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-0a04d979734167da3b80811a1874d734697f366a689f3912589b99d2e86e7ad1"),
@@ -850,9 +959,7 @@ func TestCreateLicenses(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
checkManifestFiles(t, "test")
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-2af71558e438db0b73a20beab92dc278a94e1bbe974c00c1a33e3ab62d53a608"),

View File

@@ -42,10 +42,7 @@ func TestDelete(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
})
checkManifestFiles(t, "test", "test2")
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-136bf7c76bac2ec09d6617885507d37829e04b41acc47687d45e512b544e893a"),
@@ -60,9 +57,7 @@ func TestDelete(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
})
checkManifestFiles(t, "test2")
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-136bf7c76bac2ec09d6617885507d37829e04b41acc47687d45e512b544e893a"),
@@ -76,7 +71,7 @@ func TestDelete(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{})
checkManifestFiles(t)
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{})
}
@@ -109,7 +104,7 @@ func TestDeleteDuplicateLayers(t *testing.T) {
t.Errorf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{})
checkManifestFiles(t)
}
func TestDeleteCloudSourceNormalizesToLegacyName(t *testing.T) {
@@ -129,14 +124,12 @@ func TestDeleteCloudSourceNormalizesToLegacyName(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "gpt-oss", "20b-cloud"),
})
checkManifestFiles(t, "gpt-oss:20b-cloud")
w = createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "gpt-oss:20b:cloud"})
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d (%s)", w.Code, w.Body.String())
}
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{})
checkManifestFiles(t)
}

View File

@@ -1,6 +1,7 @@
package server
import (
"bytes"
"encoding/json"
"net/http"
"slices"
@@ -9,6 +10,8 @@ import (
"github.com/gin-gonic/gin"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/manifest"
"github.com/ollama/ollama/types/model"
)
func TestList(t *testing.T) {
@@ -64,3 +67,118 @@ func TestList(t *testing.T) {
t.Fatalf("expected slices to be equal %v", actualNames)
}
}
func TestListIncludesAllManifestListChildrenInSize(t *testing.T) {
gin.SetMode(gin.TestMode)
t.Setenv("OLLAMA_MODELS", t.TempDir())
makeConfig := func(t *testing.T, format string) manifest.Layer {
t.Helper()
data, err := json.Marshal(model.ConfigV2{ModelFormat: format})
if err != nil {
t.Fatal(err)
}
layer, err := manifest.NewLayer(bytes.NewReader(data), "application/vnd.docker.container.image.v1+json")
if err != nil {
t.Fatal(err)
}
return layer
}
ggufConfig := makeConfig(t, manifest.FormatGGUF)
mlxConfig := makeConfig(t, manifest.FormatSafetensors)
sharedBlob, err := manifest.NewLayer(bytes.NewReader([]byte("shared-weights")), "application/vnd.ollama.image.model")
if err != nil {
t.Fatal(err)
}
ggufBlob, err := manifest.NewLayer(bytes.NewReader([]byte("gguf-weights")), "application/vnd.ollama.image.model")
if err != nil {
t.Fatal(err)
}
mlxBlob, err := manifest.NewLayer(bytes.NewReader([]byte("mlx-weights")), manifest.MediaTypeImageTensor)
if err != nil {
t.Fatal(err)
}
ggufLayers := []manifest.Layer{
sharedBlob,
ggufBlob,
}
if err := manifest.WriteManifestWithMetadata(model.ParseName("test-gguf"), ggufConfig, ggufLayers, manifest.RunnerGGML, manifest.FormatGGUF); err != nil {
t.Fatal(err)
}
mlxLayers := []manifest.Layer{
{
MediaType: manifest.MediaTypeImageTensor,
Digest: sharedBlob.Digest,
Size: sharedBlob.Size,
},
mlxBlob,
}
if err := manifest.WriteManifestWithMetadata(model.ParseName("test-mlx"), mlxConfig, mlxLayers, manifest.RunnerMLX, manifest.FormatSafetensors); err != nil {
t.Fatal(err)
}
ggufManifest, err := manifest.ParseNamedManifest(model.ParseName("test-gguf"))
if err != nil {
t.Fatal(err)
}
mlxManifest, err := manifest.ParseNamedManifestForRunner(model.ParseName("test-mlx"), manifest.RunnerMLX)
if err != nil {
t.Fatal(err)
}
ggufRef, err := manifest.NewManifestReference(ggufManifest.BlobDigest(), manifest.RunnerGGML, manifest.FormatGGUF)
if err != nil {
t.Fatal(err)
}
mlxRef, err := manifest.NewManifestReference(mlxManifest.BlobDigest(), manifest.RunnerMLX, manifest.FormatSafetensors)
if err != nil {
t.Fatal(err)
}
parentData, err := json.Marshal(manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifestList,
Manifests: []manifest.Manifest{ggufRef, mlxRef},
})
if err != nil {
t.Fatal(err)
}
if err := manifest.WriteManifestData(model.ParseName("test-list"), parentData); err != nil {
t.Fatal(err)
}
var s Server
w := createRequest(t, s.ListHandler, nil)
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
var resp api.ListResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatal(err)
}
var listed *api.ListModelResponse
for i := range resp.Models {
if resp.Models[i].Name == "test-list:latest" {
listed = &resp.Models[i]
break
}
}
if listed == nil {
t.Fatal("test-list:latest not found in list response")
}
want := ggufConfig.Size + sharedBlob.Size + ggufBlob.Size + mlxConfig.Size + mlxBlob.Size
if listed.Size != want {
t.Fatalf("size = %d, want %d", listed.Size, want)
}
}

View File

@@ -20,6 +20,7 @@ import (
"sort"
"strings"
"testing"
"time"
"unicode"
"github.com/gin-gonic/gin"
@@ -33,6 +34,58 @@ import (
"github.com/ollama/ollama/version"
)
func TestPsHandlerUsesRunningManifestAndRunner(t *testing.T) {
gin.SetMode(gin.TestMode)
childDigest := strings.Repeat("a", 64)
s := Server{
sched: &Scheduler{
loaded: map[string]*runnerRef{
"test": {
model: &Model{
ShortName: "test-model:latest",
Digest: strings.Repeat("b", 64),
ManifestDigest: childDigest,
Runner: manifest.RunnerMLX,
Config: model.ConfigV2{
ModelFormat: manifest.FormatSafetensors,
},
},
runner: manifest.RunnerMLX,
totalSize: 1024,
vramSize: 1024,
expiresAt: time.Now().Add(time.Hour),
sessionDuration: time.Hour,
},
},
},
}
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest(http.MethodGet, "/api/ps", nil)
s.PsHandler(c)
if w.Code != http.StatusOK {
t.Fatalf("status = %d, want %d: %s", w.Code, http.StatusOK, w.Body.String())
}
var resp api.ProcessResponse
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatal(err)
}
if len(resp.Models) != 1 {
t.Fatalf("model count = %d, want 1", len(resp.Models))
}
if resp.Models[0].Digest != childDigest {
t.Fatalf("digest = %q, want child digest %q", resp.Models[0].Digest, childDigest)
}
if resp.Models[0].Runner != manifest.RunnerMLX {
t.Fatalf("runner = %q, want %q", resp.Models[0].Runner, manifest.RunnerMLX)
}
}
func createTestFile(t *testing.T, name string) (string, string) {
t.Helper()
@@ -658,11 +711,14 @@ func TestManifestCaseSensitivity(t *testing.T) {
checkManifestList := func() {
t.Helper()
mandir := filepath.Join(os.Getenv("OLLAMA_MODELS"), "manifests/")
mandir, err := manifest.V2Path()
if err != nil {
t.Fatalf("failed to resolve v2 manifest path: %v", err)
}
var entries []string
t.Logf("dir entries:")
fsys := os.DirFS(mandir)
err := fs.WalkDir(fsys, ".", func(path string, info fs.DirEntry, err error) error {
err = fs.WalkDir(fsys, ".", func(path string, info fs.DirEntry, err error) error {
if err != nil {
return err
}
@@ -685,7 +741,14 @@ func TestManifestCaseSensitivity(t *testing.T) {
g := entries[0] // raw path
g = filepath.ToSlash(g)
w := model.ParseName(wantStableName).Filepath()
wp, err := manifest.V2PathForName(model.ParseName(wantStableName))
if err != nil {
t.Fatalf("failed to resolve expected manifest path: %v", err)
}
w, err := filepath.Rel(mandir, wp)
if err != nil {
t.Fatalf("failed to make expected manifest path relative: %v", err)
}
w = filepath.ToSlash(w)
if g != w {
t.Errorf("\ngot: %s\nwant: %s", g, w)
@@ -789,6 +852,212 @@ func TestShow(t *testing.T) {
}
}
func createShowSafetensorsLayer(t *testing.T, tensorName string, shape []int64) manifest.Layer {
t.Helper()
header := map[string]any{
tensorName: map[string]any{
"dtype": "F32",
"shape": shape,
"data_offsets": []int64{0, 16},
},
}
headerData, err := json.Marshal(header)
if err != nil {
t.Fatal(err)
}
var buf bytes.Buffer
if err := binary.Write(&buf, binary.LittleEndian, uint64(len(headerData))); err != nil {
t.Fatal(err)
}
buf.Write(headerData)
layer, err := manifest.NewLayer(bytes.NewReader(buf.Bytes()), manifest.MediaTypeImageTensor)
if err != nil {
t.Fatal(err)
}
layer.Name = tensorName
return layer
}
func writeShowManifestVariant(t *testing.T, name, runner, format string, cfg model.ConfigV2, kv map[string]any, extraLayers ...manifest.Layer) {
t.Helper()
configData, err := json.Marshal(cfg)
if err != nil {
t.Fatal(err)
}
configLayer, err := manifest.NewLayer(bytes.NewReader(configData), "application/vnd.docker.container.image.v1+json")
if err != nil {
t.Fatal(err)
}
layers := make([]manifest.Layer, 0, len(extraLayers)+1)
switch format {
case manifest.FormatGGUF:
_, digest := createBinFile(t, kv, nil)
modelLayer, err := manifest.NewLayerFromLayer(digest, "application/vnd.ollama.image.model", name)
if err != nil {
t.Fatal(err)
}
layers = append(layers, modelLayer)
case manifest.FormatSafetensors:
layers = append(layers, createShowSafetensorsLayer(t, name+".weight", []int64{2, 2}))
}
layers = append(layers, extraLayers...)
if err := manifest.WriteManifestWithMetadata(model.ParseName(name), configLayer, layers, runner, format); err != nil {
t.Fatal(err)
}
}
func TestShowAllManifestsNonListReturnsSingleManifest(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
var s Server
_, digest := createBinFile(t, ggml.KV{"general.architecture": "test"}, nil)
createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "show-model",
Files: map[string]string{"model.gguf": digest},
})
w := createRequest(t, s.ShowHandler, api.ShowRequest{
Model: "show-model",
AllManifests: true,
})
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d: %s", w.Code, w.Body.String())
}
var resp api.ShowManifestsResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatal(err)
}
if len(resp.Manifests) != 1 {
t.Fatalf("manifest count = %d, want 1", len(resp.Manifests))
}
if resp.Manifests[0].Runner != manifest.RunnerGGML {
t.Fatalf("runner = %q, want %q", resp.Manifests[0].Runner, manifest.RunnerGGML)
}
if resp.Manifests[0].Details.Format != manifest.FormatGGUF {
t.Fatalf("format = %q, want %q", resp.Manifests[0].Details.Format, manifest.FormatGGUF)
}
if resp.Manifests[0].ModelInfo["general.architecture"] != "test" {
t.Fatalf("architecture = %v, want %q", resp.Manifests[0].ModelInfo["general.architecture"], "test")
}
}
func TestShowAllManifestsManifestListDedupesLicenses(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
licenseLayer, err := manifest.NewLayer(bytes.NewReader([]byte("Apache-2.0")), "application/vnd.ollama.image.license")
if err != nil {
t.Fatal(err)
}
writeShowManifestVariant(t, "show-mlx", manifest.RunnerMLX, manifest.FormatSafetensors, model.ConfigV2{
ModelFormat: manifest.FormatSafetensors,
ModelFamily: "qwen3_5_moe",
ModelType: "35.1B",
FileType: "nvfp4",
Requires: "0.19.0",
Capabilities: []string{"completion", "vision", "thinking", "tools"},
}, nil, licenseLayer)
writeShowManifestVariant(t, "show-ggml", manifest.RunnerGGML, manifest.FormatGGUF, model.ConfigV2{
ModelFormat: manifest.FormatGGUF,
ModelFamily: "qwen35moe",
ModelType: "36.0B",
FileType: "Q4_K_M",
Capabilities: []string{"completion", "vision", "thinking", "tools"},
}, ggml.KV{"general.architecture": "qwen35moe"}, licenseLayer)
mlxManifest, err := manifest.ParseNamedManifestForRunner(model.ParseName("show-mlx"), manifest.RunnerMLX)
if err != nil {
t.Fatal(err)
}
ggmlManifest, err := manifest.ParseNamedManifestForRunner(model.ParseName("show-ggml"), manifest.RunnerGGML)
if err != nil {
t.Fatal(err)
}
mlxRef, err := manifest.NewManifestReference(mlxManifest.BlobDigest(), manifest.RunnerMLX, manifest.FormatSafetensors)
if err != nil {
t.Fatal(err)
}
ggmlRef, err := manifest.NewManifestReference(ggmlManifest.BlobDigest(), manifest.RunnerGGML, manifest.FormatGGUF)
if err != nil {
t.Fatal(err)
}
parentData, err := json.Marshal(manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifestList,
Manifests: []manifest.Manifest{mlxRef, ggmlRef},
})
if err != nil {
t.Fatal(err)
}
if err := manifest.WriteManifestData(model.ParseName("show-list"), parentData); err != nil {
t.Fatal(err)
}
var s Server
w := createRequest(t, s.ShowHandler, api.ShowRequest{
Model: "show-list",
AllManifests: true,
})
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d: %s", w.Code, w.Body.String())
}
var resp api.ShowManifestsResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatal(err)
}
if len(resp.Manifests) != 2 {
t.Fatalf("manifest count = %d, want 2", len(resp.Manifests))
}
if resp.Manifests[0].Runner != manifest.RunnerMLX || resp.Manifests[1].Runner != manifest.RunnerGGML {
t.Fatalf("runner order = [%q %q], want [%q %q]", resp.Manifests[0].Runner, resp.Manifests[1].Runner, manifest.RunnerMLX, manifest.RunnerGGML)
}
if resp.License != "Apache-2.0" {
t.Fatalf("license = %q, want %q", resp.License, "Apache-2.0")
}
if resp.Manifests[0].License != "Apache-2.0" || resp.Manifests[1].License != "Apache-2.0" {
t.Fatalf("child licenses = [%q %q], want both Apache-2.0", resp.Manifests[0].License, resp.Manifests[1].License)
}
if resp.Manifests[0].Requires != "0.19.0" {
t.Fatalf("mlx requires = %q, want %q", resp.Manifests[0].Requires, "0.19.0")
}
if len(resp.Manifests[0].Tensors) != 1 {
t.Fatalf("mlx tensor count = %d, want 1", len(resp.Manifests[0].Tensors))
}
if resp.Manifests[0].Tensors[0].Name != "show-mlx.weight" {
t.Fatalf("mlx tensor name = %q, want %q", resp.Manifests[0].Tensors[0].Name, "show-mlx.weight")
}
}
func TestShowAllManifestsRejectsRunnerSelection(t *testing.T) {
var s Server
w := createRequest(t, s.ShowHandler, api.ShowRequest{
Model: "show-model",
Runner: manifest.RunnerMLX,
AllManifests: true,
})
if w.Code != http.StatusBadRequest {
t.Fatalf("expected status code 400, actual %d: %s", w.Code, w.Body.String())
}
if got := strings.TrimSpace(w.Body.String()); got != `{"error":"runner cannot be used with all_manifests"}` {
t.Fatalf("response = %s", got)
}
}
func TestShowCopilotUserAgentOverwritesExistingBasename(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())

View File

@@ -84,7 +84,8 @@ func InitScheduler(ctx context.Context) *Scheduler {
// schedulerModelKey returns the scheduler map key for a model.
// GGUF-backed models use ModelPath; safetensors/image models without a
// ModelPath use manifest digest so distinct models don't collide.
// ModelPath use the selected manifest digest so distinct child manifests don't
// collide.
func schedulerModelKey(m *Model) string {
if m == nil {
return ""
@@ -92,6 +93,9 @@ func schedulerModelKey(m *Model) string {
if m.ModelPath != "" {
return m.ModelPath
}
if m.ManifestDigest != "" {
return "manifest:" + m.ManifestDigest
}
if m.Digest != "" {
return "digest:" + m.Digest
}
@@ -530,6 +534,12 @@ iGPUScan:
}
totalSize, vramSize := llama.MemorySize()
runnerName := req.model.Runner
if req.model.IsMLX() && runnerName == "" {
runnerName = "mlx"
} else if name := llm.RunnerName(llama); name != "" {
runnerName = name
}
runner := &runnerRef{
model: req.model,
modelPath: req.model.ModelPath,
@@ -540,6 +550,7 @@ iGPUScan:
gpus: gpuIDs,
discreteGPUs: discreteGPUs,
isImagegen: slices.Contains(req.model.Config.Capabilities, "image"),
runner: runnerName,
totalSize: totalSize,
vramSize: vramSize,
loading: true,
@@ -640,6 +651,7 @@ type runnerRef struct {
gpus []ml.DeviceID // Recorded at time of provisioning
discreteGPUs bool // True if all devices are discrete GPUs - used to skip VRAM recovery check for iGPUs
isImagegen bool // True if loaded via imagegen runner (vs mlxrunner)
runner string
vramSize uint64
totalSize uint64

View File

@@ -499,6 +499,35 @@ func TestSchedGetRunnerUsesDigestKeyWhenModelPathEmpty(t *testing.T) {
require.Len(t, s.pendingReqCh, 1)
}
func TestSchedGetRunnerUsesManifestDigestKeyWhenModelPathEmpty(t *testing.T) {
ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond)
defer done()
s := InitScheduler(ctx)
opts := api.DefaultOptions()
opts.NumCtx = 4
loadedModel := &Model{Name: "list", Digest: "parent", ManifestDigest: "child-a"}
loadedRunner := &runnerRef{
model: loadedModel,
modelKey: schedulerModelKey(loadedModel),
llama: &mockLlm{vramByGPU: map[ml.DeviceID]uint64{}},
Options: &opts,
numParallel: 1,
}
s.loadedMu.Lock()
s.loaded[loadedRunner.modelKey] = loadedRunner
s.loadedMu.Unlock()
reqModel := &Model{Name: "list", Digest: "parent", ManifestDigest: "child-b"}
successCh, errCh := s.GetRunner(ctx, reqModel, opts, nil)
require.Empty(t, successCh)
require.Empty(t, errCh)
require.Len(t, s.pendingReqCh, 1)
}
func TestSchedGetRunnerReusesSameDigestWhenModelPathEmpty(t *testing.T) {
ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond)
defer done()

View File

@@ -142,6 +142,7 @@ func waitForOllamaSignin(ctx context.Context) error {
// RunOptions contains options for running an interactive agent session.
type RunOptions struct {
Model string
Runner string
Messages []api.Message
WordWrap bool
Format string
@@ -260,6 +261,7 @@ func Chat(ctx context.Context, opts RunOptions) (*api.Message, error) {
for {
req := &api.ChatRequest{
Model: opts.Model,
Runner: opts.Runner,
Messages: messages,
Format: json.RawMessage(opts.Format),
Options: opts.Options,
@@ -638,13 +640,13 @@ func renderToolCalls(toolCalls []api.ToolCall, plainText bool) string {
}
// checkModelCapabilities checks if the model supports tools.
func checkModelCapabilities(ctx context.Context, modelName string) (supportsTools bool, err error) {
func checkModelCapabilities(ctx context.Context, modelName, runner string) (supportsTools bool, err error) {
client, err := api.ClientFromEnvironment()
if err != nil {
return false, err
}
resp, err := client.Show(ctx, &api.ShowRequest{Model: modelName})
resp, err := client.Show(ctx, &api.ShowRequest{Model: modelName, Runner: runner})
if err != nil {
return false, err
}
@@ -662,7 +664,7 @@ func checkModelCapabilities(ctx context.Context, modelName string) (supportsTool
// This is called from cmd.go when --experimental flag is set.
// If yoloMode is true, all tool approvals are skipped.
// If enableWebsearch is true, the web search tool is registered.
func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, options map[string]any, think *api.ThinkValue, hideThinking bool, keepAlive *api.Duration, yoloMode bool, enableWebsearch bool) error {
func GenerateInteractive(cmd *cobra.Command, modelName, runner string, wordWrap bool, options map[string]any, think *api.ThinkValue, hideThinking bool, keepAlive *api.Duration, yoloMode bool, enableWebsearch bool) error {
scanner, err := readline.New(readline.Prompt{
Prompt: ">>> ",
AltPrompt: "... ",
@@ -677,7 +679,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
defer fmt.Printf(readline.EndBracketedPaste)
// Check if model supports tools
supportsTools, err := checkModelCapabilities(cmd.Context(), modelName)
supportsTools, err := checkModelCapabilities(cmd.Context(), modelName, runner)
if err != nil {
fmt.Fprintf(os.Stderr, "\033[1mwarning:\033[0m could not check model capabilities: %v\n", err)
supportsTools = false
@@ -807,7 +809,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
think = &thinkValue
// Check if model supports thinking
if client, err := api.ClientFromEnvironment(); err == nil {
if resp, err := client.Show(cmd.Context(), &api.ShowRequest{Model: modelName}); err == nil {
if resp, err := client.Show(cmd.Context(), &api.ShowRequest{Model: modelName, Runner: runner}); err == nil {
if !slices.Contains(resp.Capabilities, model.CapabilityThinking) {
fmt.Fprintf(os.Stderr, "warning: model %q does not support thinking output\n", modelName)
}
@@ -822,7 +824,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
think = &api.ThinkValue{Value: false}
// Check if model supports thinking
if client, err := api.ClientFromEnvironment(); err == nil {
if resp, err := client.Show(cmd.Context(), &api.ShowRequest{Model: modelName}); err == nil {
if resp, err := client.Show(cmd.Context(), &api.ShowRequest{Model: modelName, Runner: runner}); err == nil {
if !slices.Contains(resp.Capabilities, model.CapabilityThinking) {
fmt.Fprintf(os.Stderr, "warning: model %q does not support thinking output\n", modelName)
}
@@ -884,6 +886,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
}
req := &api.ShowRequest{
Name: modelName,
Runner: runner,
Options: options,
}
resp, err := client.Show(cmd.Context(), req)
@@ -981,7 +984,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
}
// Check if model exists and get its info
info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: newModelName})
info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: newModelName, Runner: runner})
if err != nil {
p.StopAndClear()
if strings.Contains(err.Error(), "not found") {
@@ -996,8 +999,9 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
if info.RemoteHost == "" {
// Preload the model by sending an empty generate request
req := &api.GenerateRequest{
Model: newModelName,
Think: think,
Model: newModelName,
Runner: runner,
Think: think,
}
err = client.Generate(cmd.Context(), req, func(r api.GenerateResponse) error {
return nil
@@ -1059,6 +1063,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
verbose, _ := cmd.Flags().GetBool("verbose")
opts := RunOptions{
Model: modelName,
Runner: runner,
Messages: messages,
WordWrap: wordWrap,
Format: format,

View File

@@ -389,7 +389,7 @@ func newManifestWriter(opts CreateOptions, capabilities []string, parserName, re
manifestLayers = append(manifestLayers, modelfileLayers...)
}
return manifest.WriteManifest(name, configLayer, manifestLayers)
return manifest.WriteManifestWithMetadata(name, configLayer, manifestLayers, manifest.RunnerMLX, manifest.FormatSafetensors)
}
}

View File

@@ -11,6 +11,8 @@ import (
"strings"
"github.com/ollama/ollama/envconfig"
rootmanifest "github.com/ollama/ollama/manifest"
"github.com/ollama/ollama/types/model"
)
// ManifestLayer represents a layer in the manifest.
@@ -49,9 +51,7 @@ func DefaultManifestDir() string {
// LoadManifest loads a manifest for the given model name.
// Model name format: "modelname" or "modelname:tag" or "host/namespace/name:tag"
func LoadManifest(modelName string) (*ModelManifest, error) {
manifestPath := resolveManifestPath(modelName)
data, err := os.ReadFile(manifestPath)
data, err := rootmanifest.ReadSelectedManifestData(model.ParseName(modelName))
if err != nil {
return nil, fmt.Errorf("read manifest: %w", err)
}
@@ -67,36 +67,6 @@ func LoadManifest(modelName string) (*ModelManifest, error) {
}, nil
}
// resolveManifestPath converts a model name to a manifest file path.
func resolveManifestPath(modelName string) string {
// Parse model name into components
// Default: registry.ollama.ai/library/<name>/<tag>
host := "registry.ollama.ai"
namespace := "library"
name := modelName
tag := "latest"
// Handle explicit tag
if idx := strings.LastIndex(name, ":"); idx != -1 {
tag = name[idx+1:]
name = name[:idx]
}
// Handle full path like "host/namespace/name"
parts := strings.Split(name, "/")
switch len(parts) {
case 3:
host = parts[0]
namespace = parts[1]
name = parts[2]
case 2:
namespace = parts[0]
name = parts[1]
}
return filepath.Join(DefaultManifestDir(), host, namespace, name, tag)
}
// BlobPath returns the full path to a blob given its digest.
func (m *ModelManifest) BlobPath(digest string) string {
// Convert "sha256:abc123" to "sha256-abc123"

View File

@@ -1,8 +1,12 @@
package manifest
import (
"os"
"path/filepath"
"testing"
rootmanifest "github.com/ollama/ollama/manifest"
"github.com/ollama/ollama/types/model"
)
func TestTotalTensorSize(t *testing.T) {
@@ -55,3 +59,39 @@ func TestManifestAndBlobDirsRespectOLLAMAModels(t *testing.T) {
t.Fatalf("DefaultBlobDir() = %q, want %q", got, wantBlobs)
}
}
func TestLoadManifestPrefersV2(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
name := model.ParseName("example")
legacyPath, err := rootmanifest.PathForName(name)
if err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(filepath.Dir(legacyPath), 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(legacyPath, []byte(`{"schemaVersion":2,"mediaType":"legacy"}`), 0o644); err != nil {
t.Fatal(err)
}
v2Path, err := rootmanifest.V2PathForName(name)
if err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(filepath.Dir(v2Path), 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(v2Path, []byte(`{"schemaVersion":2,"mediaType":"v2"}`), 0o644); err != nil {
t.Fatal(err)
}
m, err := LoadManifest(name.String())
if err != nil {
t.Fatal(err)
}
if m.Manifest.MediaType != "v2" {
t.Fatalf("media type = %q, want %q", m.Manifest.MediaType, "v2")
}
}

View File

@@ -43,7 +43,13 @@ type modelConfig struct {
// GetSafetensorsLLMInfo extracts model information from safetensors LLM models.
// It reads the config.json layer and returns a map compatible with GGML's KV format.
func GetSafetensorsLLMInfo(name model.Name) (map[string]any, error) {
mf, err := manifest.ParseNamedManifest(name)
return GetSafetensorsLLMInfoForRunner(name, "")
}
// GetSafetensorsLLMInfoForRunner extracts model information from the
// safetensors manifest selected for runner.
func GetSafetensorsLLMInfoForRunner(name model.Name, runner string) (map[string]any, error) {
mf, err := manifest.ParseNamedManifestForRunner(name, runner)
if err != nil {
return nil, fmt.Errorf("failed to load manifest: %w", err)
}
@@ -212,7 +218,13 @@ func getParameterCountFromManifest(mf *manifest.Manifest) (int64, error) {
// GetSafetensorsTensorInfo extracts tensor information from safetensors model layers.
// Each tensor is stored as a minimal safetensors file with an 88-byte header containing metadata.
func GetSafetensorsTensorInfo(name model.Name) ([]api.Tensor, error) {
mf, err := manifest.ParseNamedManifest(name)
return GetSafetensorsTensorInfoForRunner(name, "")
}
// GetSafetensorsTensorInfoForRunner extracts tensor information from the
// safetensors manifest selected for runner.
func GetSafetensorsTensorInfoForRunner(name model.Name, runner string) ([]api.Tensor, error) {
mf, err := manifest.ParseNamedManifestForRunner(name, runner)
if err != nil {
return nil, fmt.Errorf("failed to load manifest: %w", err)
}
@@ -309,7 +321,13 @@ func getTensorInfoFromManifest(mf *manifest.Manifest) ([]api.Tensor, error) {
// Reads quant_type from the first tensor blob's __metadata__.
// Falls back to torch_dtype from config.json if no quant metadata.
func GetSafetensorsDtype(name model.Name) (string, error) {
mf, err := manifest.ParseNamedManifest(name)
return GetSafetensorsDtypeForRunner(name, "")
}
// GetSafetensorsDtypeForRunner returns the quantization type from the
// safetensors manifest selected for runner.
func GetSafetensorsDtypeForRunner(name model.Name, runner string) (string, error) {
mf, err := manifest.ParseNamedManifestForRunner(name, runner)
if err != nil {
return "", fmt.Errorf("failed to load manifest: %w", err)
}

View File

@@ -9,6 +9,7 @@ import (
"testing"
"github.com/ollama/ollama/manifest"
"github.com/ollama/ollama/types/model"
)
func TestBuildModelInfo(t *testing.T) {
@@ -714,6 +715,99 @@ func TestGetTensorInfoFromManifest_Quantized(t *testing.T) {
}
}
func createSafetensorsManifestForRunner(t *testing.T, name, runner, tensorName string) manifest.Manifest {
t.Helper()
configLayer, err := manifest.NewLayer(bytes.NewReader([]byte("{}")), "application/vnd.docker.container.image.v1+json")
if err != nil {
t.Fatal(err)
}
header := map[string]any{
tensorName: map[string]any{
"dtype": "F32",
"shape": []int64{2, 3},
"data_offsets": []int64{0, 24},
},
}
headerData, err := json.Marshal(header)
if err != nil {
t.Fatal(err)
}
var buf bytes.Buffer
if err := binary.Write(&buf, binary.LittleEndian, uint64(len(headerData))); err != nil {
t.Fatal(err)
}
buf.Write(headerData)
tensorLayer, err := manifest.NewLayer(bytes.NewReader(buf.Bytes()), manifest.MediaTypeImageTensor)
if err != nil {
t.Fatal(err)
}
tensorLayer.Name = tensorName
if err := manifest.WriteManifestWithMetadata(model.ParseName(name), configLayer, []manifest.Layer{tensorLayer}, runner, manifest.FormatSafetensors); err != nil {
t.Fatal(err)
}
mf, err := manifest.ParseNamedManifestForRunner(model.ParseName(name), runner)
if err != nil {
t.Fatal(err)
}
return *mf
}
func TestGetSafetensorsTensorInfoForRunnerSelectsChildManifest(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
mlxManifest := createSafetensorsManifestForRunner(t, "runner-mlx", manifest.RunnerMLX, "mlx.weight")
ggmlManifest := createSafetensorsManifestForRunner(t, "runner-ggml", manifest.RunnerGGML, "ggml.weight")
mlxRef, err := manifest.NewManifestReference(mlxManifest.BlobDigest(), manifest.RunnerMLX, manifest.FormatSafetensors)
if err != nil {
t.Fatal(err)
}
ggmlRef, err := manifest.NewManifestReference(ggmlManifest.BlobDigest(), manifest.RunnerGGML, manifest.FormatSafetensors)
if err != nil {
t.Fatal(err)
}
parentData, err := json.Marshal(manifest.Manifest{
SchemaVersion: 2,
MediaType: manifest.MediaTypeManifestList,
Manifests: []manifest.Manifest{ggmlRef, mlxRef},
})
if err != nil {
t.Fatal(err)
}
if err := manifest.WriteManifestData(model.ParseName("runner-list"), parentData); err != nil {
t.Fatal(err)
}
for _, tt := range []struct {
runner string
want string
}{
{runner: manifest.RunnerMLX, want: "mlx.weight"},
{runner: manifest.RunnerGGML, want: "ggml.weight"},
} {
t.Run(tt.runner, func(t *testing.T) {
tensors, err := GetSafetensorsTensorInfoForRunner(model.ParseName("runner-list"), tt.runner)
if err != nil {
t.Fatal(err)
}
if len(tensors) != 1 {
t.Fatalf("tensor count = %d, want 1", len(tensors))
}
if tensors[0].Name != tt.want {
t.Fatalf("tensor name = %q, want %q", tensors[0].Name, tt.want)
}
})
}
}
func TestGetParameterCountFromManifest(t *testing.T) {
// Create a temp directory for blobs and set OLLAMA_MODELS
tempDir := t.TempDir()