...

Revert "types/model: remove (*Digest).Scan and Digest.Value (#3589 )"
This reverts commit 42f2cc408e.
2026-04-21 00:05:40 +02:00 · 2024-04-11 00:57:08 -07:00 · 2024-04-11 00:45:07 -07:00 · 2024-04-11 00:37:26 -07:00 · 2024-04-10 16:55:12 -07:00 · 2024-04-10 16:52:49 -07:00
28 changed files with 1527 additions and 129 deletions
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -5,7 +5,6 @@ on:
    paths:
      - '**/*'
      - '!docs/**'
-      - '!examples/**'
      - '!README.md'

 jobs:
--- a/api/client.go
+++ b/api/client.go
@@ -1,3 +1,9 @@
+// Package api implements the client-side API for code wishing to interact
+// with the ollama service. The methods of the [Client] type correspond to
+// the ollama REST API as described in https://github.com/ollama/ollama/blob/main/docs/api.md
+//
+// The ollama command-line client itself uses this package to interact with
+// the backend service.
 package api

 import (
@@ -18,6 +24,8 @@ import (
 	"github.com/ollama/ollama/version"
 )

+// Client encapsulates client state for interacting with the ollama
+// service. Use [ClientFromEnvironment] to create new Clients.
 type Client struct {
 	base *url.URL
 	http *http.Client
@@ -39,6 +47,15 @@ func checkError(resp *http.Response, body []byte) error {
 	return apiError
 }

+// ClientFromEnvironment creates a new [Client] using configuration from the
+// environment variable OLLAMA_HOST, which points to the network host and
+// port on which the ollama service is listenting. The format of this variable
+// is:
+//
+//	<scheme>://<host>:<port>
+//
+// If the variable is not specified, a default ollama host and port will be
+// used.
 func ClientFromEnvironment() (*Client, error) {
 	defaultPort := "11434"

@@ -190,8 +207,14 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 	return nil
 }

+// GenerateResponseFunc is a function that [Client.Generate] invokes every time
+// a response is received from the service. If this function returns an error,
+// [Client.Generate] will stop generating and return this error.
 type GenerateResponseFunc func(GenerateResponse) error

+// Generate generates a response for a given prompt. The req parameter should
+// be populated with prompt details. fn is called for each response (there may
+// be multiple responses, e.g. in case streaming is enabled).
 func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn GenerateResponseFunc) error {
 	return c.stream(ctx, http.MethodPost, "/api/generate", req, func(bts []byte) error {
 		var resp GenerateResponse
@@ -203,8 +226,15 @@ func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn Generate
 	})
 }

+// ChatResponseFunc is a function that [Client.Chat] invokes every time
+// a response is received from the service. If this function returns an error,
+// [Client.Chat] will stop generating and return this error.
 type ChatResponseFunc func(ChatResponse) error

+// Chat generates the next message in a chat. [ChatRequest] may contain a
+// sequence of messages which can be used to maintain chat history with a model.
+// fn is called for each response (there may be multiple responses, e.g. if case
+// streaming is enabled).
 func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc) error {
 	return c.stream(ctx, http.MethodPost, "/api/chat", req, func(bts []byte) error {
 		var resp ChatResponse
@@ -216,8 +246,14 @@ func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc
 	})
 }

+// PullProgressFunc is a function that [Client.Pull] invokes every time there
+// is progress with a "pull" request sent to the service. If this function
+// returns an error, [Client.Pull] will stop the process and return this error.
 type PullProgressFunc func(ProgressResponse) error

+// Pull downloads a model from the ollama library. fn is called each time
+// progress is made on the request and can be used to display a progress bar,
+// etc.
 func (c *Client) Pull(ctx context.Context, req *PullRequest, fn PullProgressFunc) error {
 	return c.stream(ctx, http.MethodPost, "/api/pull", req, func(bts []byte) error {
 		var resp ProgressResponse
--- a/api/types.go
+++ b/api/types.go
@@ -33,18 +33,46 @@ func (e StatusError) Error() string {

 type ImageData []byte

+// GenerateRequest describes a request sent by [Client.Generate]. While you
+// have to specify the Model and Prompt fields, all the other fields have
+// reasonable defaults for basic uses.
 type GenerateRequest struct {
-	Model     string      `json:"model"`
-	Prompt    string      `json:"prompt"`
-	System    string      `json:"system"`
-	Template  string      `json:"template"`
-	Context   []int       `json:"context,omitempty"`
-	Stream    *bool       `json:"stream,omitempty"`
-	Raw       bool        `json:"raw,omitempty"`
-	Format    string      `json:"format"`
-	KeepAlive *Duration   `json:"keep_alive,omitempty"`
-	Images    []ImageData `json:"images,omitempty"`
+	// Model is the model name; it should be a name familiar to Ollama from
+	// the library at https://ollama.com/library
+	Model string `json:"model"`

+	// Prompt is the textual prompt to send to the model.
+	Prompt string `json:"prompt"`
+
+	// System overrides the model's default system message/prompt.
+	System string `json:"system"`
+
+	// Template overrides the model's default prompt template.
+	Template string `json:"template"`
+
+	// Context is the context parameter returned from a previous call to
+	// Generate call. It can be used to keep a short conversational memory.
+	Context []int `json:"context,omitempty"`
+
+	// Stream specifies whether the response is streaming; it is true by default.
+	Stream *bool `json:"stream,omitempty"`
+
+	// Raw set to true means that no formatting will be applied to the prompt.
+	Raw bool `json:"raw,omitempty"`
+
+	// Format specifies the format to return a response in.
+	Format string `json:"format"`
+
+	// KeepAlive controls how long the model will stay loaded in memory following
+	// this request.
+	KeepAlive *Duration `json:"keep_alive,omitempty"`
+
+	// Images is an optional list of base64-encoded images accompanying this
+	// request, for multimodal models.
+	Images []ImageData `json:"images,omitempty"`
+
+	// Options lists model-specific options. For example, temperature can be
+	// set through this field, if the model supports it.
 	Options map[string]interface{} `json:"options"`
 }

--- a/app/tray/tray.go
+++ b/app/tray/tray.go
@@ -24,10 +24,5 @@ func NewTray() (commontray.OllamaTray, error) {
 		return nil, fmt.Errorf("failed to load icon %s: %w", iconName, err)
 	}

-	tray, err := InitPlatformTray(icon, updateIcon)
-	if err != nil {
-		return nil, err
-	}
-
-	return tray, nil
+	return InitPlatformTray(icon, updateIcon)
 }
--- a/examples/go-generate-streaming/main.go
+++ b/examples/go-generate-streaming/main.go
@@ -0,0 +1,40 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"log"
+
+	"github.com/ollama/ollama/api"
+)
+
+func main() {
+	client, err := api.ClientFromEnvironment()
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	// By default, GenerateRequest is streaming.
+	req := &api.GenerateRequest{
+		Model:  "gemma",
+		Prompt: "how many planets are there?",
+	}
+
+	ctx := context.Background()
+	respFunc := func(resp api.GenerateResponse) error {
+		// Only print the response here; GenerateResponse has a number of other
+		// interesting fields you want to examine.
+
+		// In streaming mode, responses are partial so we call fmt.Print (and not
+		// Println) in order to avoid spurious newlines being introduced. The
+		// model will insert its own newlines if it wants.
+		fmt.Print(resp.Response)
+		return nil
+	}
+
+	err = client.Generate(ctx, req, respFunc)
+	if err != nil {
+		log.Fatal(err)
+	}
+	fmt.Println()
+}
--- a/examples/go-generate/main.go
+++ b/examples/go-generate/main.go
@@ -0,0 +1,37 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"log"
+
+	"github.com/ollama/ollama/api"
+)
+
+func main() {
+	client, err := api.ClientFromEnvironment()
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	req := &api.GenerateRequest{
+		Model:  "gemma",
+		Prompt: "how many planets are there?",
+
+		// set streaming to false
+		Stream: new(bool),
+	}
+
+	ctx := context.Background()
+	respFunc := func(resp api.GenerateResponse) error {
+		// Only print the response here; GenerateResponse has a number of other
+		// interesting fields you want to examine.
+		fmt.Println(resp.Response)
+		return nil
+	}
+
+	err = client.Generate(ctx, req, respFunc)
+	if err != nil {
+		log.Fatal(err)
+	}
+}
--- a/examples/golang-simplegenerate/README.md
+++ b/examples/golang-simplegenerate/README.md
--- a/examples/golang-simplegenerate/main.go
+++ b/examples/golang-simplegenerate/main.go
--- a/format/bytes.go
+++ b/format/bytes.go
@@ -50,7 +50,7 @@ func HumanBytes(b int64) string {
 	}
 }

-func HumanBytes2(b int64) string {
+func HumanBytes2(b uint64) string {
 	switch {
 	case b >= MebiByte:
 		return fmt.Sprintf("%.1f MiB", float64(b)/MebiByte)
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -243,7 +243,7 @@ func getCPUMem() (memInfo, error) {
 	return ret, nil
 }

-func CheckVRAM() (int64, error) {
+func CheckVRAM() (uint64, error) {
 	userLimit := os.Getenv("OLLAMA_MAX_VRAM")
 	if userLimit != "" {
 		avail, err := strconv.ParseInt(userLimit, 10, 64)
@@ -251,11 +251,11 @@ func CheckVRAM() (int64, error) {
 			return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
 		}
 		slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
-		return avail, nil
+		return uint64(avail), nil
 	}
 	gpuInfo := GetGPUInfo()
 	if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
-		return int64(gpuInfo.FreeMemory), nil
+		return gpuInfo.FreeMemory, nil
 	}

 	return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@@ -17,7 +17,7 @@ import (
 )

 // CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
-func CheckVRAM() (int64, error) {
+func CheckVRAM() (uint64, error) {
 	userLimit := os.Getenv("OLLAMA_MAX_VRAM")
 	if userLimit != "" {
 		avail, err := strconv.ParseInt(userLimit, 10, 64)
@@ -25,15 +25,14 @@ func CheckVRAM() (int64, error) {
 			return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
 		}
 		slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
-		return avail, nil
+		return uint64(avail), nil
 	}

 	if runtime.GOARCH == "amd64" {
 		// gpu not supported, this may not be metal
 		return 0, nil
 	}
-	recommendedMaxVRAM := int64(C.getRecommendedMaxVRAM())
-	return recommendedMaxVRAM, nil
+	return uint64(C.getRecommendedMaxVRAM()), nil
 }

 func GetGPUInfo() GpuInfo {
--- a/gpu/types.go
+++ b/gpu/types.go
@@ -15,7 +15,7 @@ type GpuInfo struct {
 	Variant string `json:"variant,omitempty"`

 	// MinimumMemory represents the minimum memory required to use the GPU
-	MinimumMemory int64 `json:"-"`
+	MinimumMemory uint64 `json:"-"`

 	// TODO add other useful attributes about the card here for discovery information
 }
--- a/llm/ggla.go
+++ b/llm/ggla.go
@@ -49,7 +49,7 @@ func (llm *ggla) KV() KV {
 	return llm.kv
 }

-func (llm *ggla) Tensors() []*Tensor {
+func (llm *ggla) Tensors() Tensors {
 	return llm.tensors
 }

--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -13,16 +13,6 @@ type GGML struct {
 	model
 }

-func (ggml *GGML) LayerSize(prefix string) (n int64) {
-	for _, t := range ggml.Tensors() {
-		if strings.HasPrefix(t.Name, prefix) {
-			n += int64(t.size())
-		}
-	}
-
-	return
-}
-
 const (
 	fileTypeF32 uint32 = iota
 	fileTypeF16
@@ -101,7 +91,7 @@ func fileType(fileType uint32) string {

 type model interface {
 	KV() KV
-	Tensors() []*Tensor
+	Tensors() Tensors
 }

 type KV map[string]any
@@ -167,6 +157,36 @@ func (kv KV) ContextLength() uint64 {
 	return kv.u64(fmt.Sprintf("%s.context_length", kv.Architecture()))
 }

+type Tensors []*Tensor
+
+func (ts Tensors) Layers() map[string]Layer {
+	layers := make(map[string]Layer)
+	for _, t := range ts {
+		parts := strings.Split(t.Name, ".")
+		if parts[0] == "blk" {
+			parts = parts[1:]
+		}
+
+		if _, ok := layers[parts[0]]; !ok {
+			layers[parts[0]] = make(Layer)
+		}
+
+		layers[parts[0]][strings.Join(parts[1:], ".")] = t
+	}
+
+	return layers
+}
+
+type Layer map[string]*Tensor
+
+func (l Layer) size() (size uint64) {
+	for _, t := range l {
+		size += t.size()
+	}
+
+	return size
+}
+
 type Tensor struct {
 	Name   string `json:"name"`
 	Kind   uint32 `json:"kind"`
@@ -304,49 +324,52 @@ func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) {
 	}, offset, nil
 }

-func (llm GGML) GraphSize(context, batch int) (int64, bool) {
-	embeddingLength := llm.KV().EmbeddingLength()
-	headCount := llm.KV().HeadCount()
-	headCountKV := llm.KV().HeadCountKV()
-	vocabLength := len(llm.KV()["tokenizer.ggml.tokens"].([]any))
-
-	var attnQKVWeight1 uint64 = 0
-	for _, t := range llm.Tensors() {
-		if strings.HasSuffix(t.Name, ".attn_qkv.weight") && len(t.Shape) >= 2 {
-			attnQKVWeight1 = t.Shape[1]
-			break
-		}
-	}
-
-	var ffnGate1 uint64 = 0
-	for _, t := range llm.Tensors() {
-		if strings.Index(t.Name, ".ffn_gate") > 0 && len(t.Shape) >= 2 {
-			ffnGate1 = t.Shape[1]
-			break
-		}
-	}
+func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload uint64) {
+	embedding := llm.KV().EmbeddingLength()
+	heads := llm.KV().HeadCount()
+	headsKV := llm.KV().HeadCountKV()
+	vocab := uint64(len(llm.KV()["tokenizer.ggml.tokens"].([]any)))

 	switch llm.KV().Architecture() {
-	case "gemma", "command-r":
-		return 4 * int64(batch) * int64(embeddingLength+uint64(vocabLength)), true
-	case "phi2":
-		return max(
-			4*int64(batch)*int64(embeddingLength+uint64(vocabLength)),
-			4*int64(batch)*int64(1+4*embeddingLength+uint64(context)+attnQKVWeight1+uint64(context)*headCount),
-		), true
-	case "qwen2":
-		return max(
-			4*int64(batch)*int64(embeddingLength+uint64(vocabLength)),
-			4*int64(batch)*int64(1+2*embeddingLength+uint64(context)+uint64(context)*headCount),
-		), true
 	case "llama":
-		if ffnGate1 > 0 {
-			// moe
-			return 4 * int64(batch) * int64(2+3*embeddingLength+uint64(context)+uint64(context)*headCount+2*headCountKV+ffnGate1), true
-		}
-	
-		return 4 * int64(batch) * int64(1+4*embeddingLength+uint64(context)+uint64(context)*headCount), true
+		fullOffload = 4 * batch * (1 + 4*embedding + context*(1+heads))
+
+		partialOffload = 4 * batch * embedding
+		partialOffload += max(
+			4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embedding/heads*headsKV),
+			4*batch*(embedding+vocab)+embedding*vocab*105/128,
+		)
+	case "gemma":
+		fullOffload = 4 * batch * (embedding + vocab)
+		partialOffload = 4*batch*(2*embedding+vocab+1) + embedding*vocab*105/128
+	case "command-r":
+		fullOffload = max(
+			4*batch*(embedding+vocab),
+			4*batch*(2+4*embedding+context*(1+heads)),
+		)
+
+		partialOffload = max(
+			4*batch*(embedding+vocab)+embedding*vocab*105/128,
+			4*batch*(1+2*embedding+context*(1+heads))+ 4*embedding*context+embedding*embedding*9/16,
+		)
+	case "qwen2":
+		fullOffload = max(
+			4*batch*(embedding+vocab),
+			4*batch*(1+2*embedding+context+context*heads),
+		)
+
+		partialOffload = max(
+			4*batch*(embedding+vocab)+embedding*vocab*105/128,
+			4*(batch*(1+2*embedding+context*(1+heads))+embedding*(1+context)),
+		)
+	case "phi2":
+		fullOffload = max(
+			4*batch*(embedding+vocab),
+			4*batch*(1+4*embedding+context+context*heads),
+		)
+
+		partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
 	}

-	return 0, false
+	return
 }
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -109,7 +109,7 @@ func (llm *gguf) KV() KV {
 	return llm.kv
 }

-func (llm *gguf) Tensors() []*Tensor {
+func (llm *gguf) Tensors() Tensors {
 	return llm.tensors
 }

--- a/llm/server.go
+++ b/llm/server.go
@@ -41,10 +41,6 @@ var cpuOnlyFamilies = []string{
 }

 func NewLlamaServer(model string, adapters, projectors []string, opts api.Options) (*LlamaServer, error) {
-	if _, err := os.Stat(model); err != nil {
-		return nil, err
-	}
-
 	f, err := os.Open(model)
 	if err != nil {
 		return nil, err
@@ -65,67 +61,79 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 		opts.NumCtx = 4
 	}

-	availableMemory, _ := gpu.CheckVRAM()
+	memoryAvailable, _ := gpu.CheckVRAM()
 	info := gpu.GetGPUInfo()

-	usedMemory := info.MinimumMemory
+	memoryMinimum := info.MinimumMemory
 	for _, projector := range projectors {
-		usedMemory += projectorMemoryRequirements(projector)
+		memoryMinimum += projectorMemoryRequirements(projector)

 		// multimodal models require at least 2048 context
 		opts.NumCtx = max(opts.NumCtx, 2048)
 	}

 	// fp16 k,v = (1 (k) + 1 (v)) * sizeof(float16) * n_ctx * n_layer * n_embd / n_head * n_head_kv
-	kv := 2 * 2 * int64(opts.NumCtx) * int64(ggml.KV().BlockCount()) * int64(ggml.KV().EmbeddingLength()) / int64(ggml.KV().HeadCount()) * int64(ggml.KV().HeadCountKV())
+	var kv uint64 = 2 * 2 * uint64(opts.NumCtx) * ggml.KV().BlockCount() * ggml.KV().EmbeddingLength() / ggml.KV().HeadCount() * ggml.KV().HeadCountKV()

-	graph, ok := ggml.GraphSize(opts.NumCtx, min(opts.NumCtx, opts.NumBatch))
-	if !ok {
-		graph = int64(ggml.KV().GQA()) * kv / 6
+	graphPartialOffload, graphFullOffload := ggml.GraphSize(uint64(opts.NumCtx), uint64(min(opts.NumCtx, opts.NumBatch)))
+	if graphPartialOffload == 0 {
+		graphPartialOffload = ggml.KV().GQA() * kv / 6
 	}

-	usedMemory += graph
-
-	if (usedMemory > availableMemory || slices.Contains(cpuOnlyFamilies, ggml.KV().Architecture())) && info.Library != "metal" {
-		info.Library = "cpu"
+	if graphFullOffload == 0 {
+		graphFullOffload = graphPartialOffload
 	}

-	requiredMemory := usedMemory
+	// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
+	memoryRequiredTotal := memoryMinimum + graphFullOffload

-	var layers int
-	for i := 0; i < int(ggml.KV().BlockCount()); i++ {
-		layerMemory := ggml.LayerSize(fmt.Sprintf("blk.%d.", i)) + kv/int64(ggml.KV().BlockCount())
-		requiredMemory += layerMemory
+	// memoryRequiredPartial represents the memory required for partial GPU offloading (n > 0, n < layers)
+	memoryRequiredPartial := memoryMinimum + graphPartialOffload

-		if availableMemory > usedMemory+layerMemory && (opts.NumGPU < 0 || layers < opts.NumGPU) {
-			usedMemory += layerMemory
-			layers++
+	if info.Library != "metal" {
+		if memoryRequiredPartial > memoryAvailable || slices.Contains(cpuOnlyFamilies, ggml.KV().Architecture()) {
+			info.Library = "cpu"
 		}
 	}

-	memOutputLayer := ggml.LayerSize("output.")
-	requiredMemory += memOutputLayer
+	var layerCount int
+	layers := ggml.Tensors().Layers()
+	for i := 0; i < int(ggml.KV().BlockCount()); i++ {
+		memoryLayer := layers[fmt.Sprintf("%d", i)].size()

-	// only offload output layer if all repeating layers are offloaded
-	if layers >= int(ggml.KV().BlockCount()) && availableMemory > usedMemory+memOutputLayer {
-		usedMemory += memOutputLayer
-		layers++
+		// KV is proportional to the number of layers
+		memoryLayer += kv / ggml.KV().BlockCount()
+
+		memoryRequiredTotal += memoryLayer
+		if memoryAvailable > memoryRequiredPartial+memoryLayer {
+			memoryRequiredPartial += memoryLayer
+			layerCount++
+		}
+	}
+
+	memoryLayerOutput := layers["output"].size()
+	memoryRequiredTotal += memoryLayerOutput
+	if memoryAvailable > memoryRequiredTotal {
+		layerCount = int(ggml.KV().BlockCount()) + 1
+		memoryRequiredPartial = memoryRequiredTotal
+	}
+
+	if opts.NumGPU < 0 {
+		opts.NumGPU = layerCount
 	}

 	slog.Info(
 		"offload to gpu",
-		"layers", layers,
-		"required", format.HumanBytes2(requiredMemory),
-		"used", format.HumanBytes2(usedMemory),
-		"available", format.HumanBytes2(availableMemory),
+		"reallayers", opts.NumGPU,
+		"layers", layerCount,
+		"required", format.HumanBytes2(memoryRequiredTotal),
+		"used", format.HumanBytes2(memoryRequiredPartial),
+		"available", format.HumanBytes2(memoryAvailable),
 		"kv", format.HumanBytes2(kv),
-		"graph", format.HumanBytes2(graph),
+		"fulloffload", format.HumanBytes2(graphFullOffload),
+		"partialoffload", format.HumanBytes2(graphPartialOffload),
 	)

-	if opts.NumGPU < 0 && info.Library != "cpu" {
-		opts.NumGPU = layers
-	}
-
 	if len(adapters) > 1 {
 		return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
 	}
@@ -282,7 +290,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 	return nil, finalErr
 }

-func projectorMemoryRequirements(filename string) int64 {
+func projectorMemoryRequirements(filename string) uint64 {
 	file, err := os.Open(filename)
 	if err != nil {
 		return 0
@@ -294,18 +302,12 @@ func projectorMemoryRequirements(filename string) int64 {
 		return 0
 	}

-	prefixes := make(map[string]struct{})
-	for _, layer := range ggml.Tensors() {
-		parts := strings.Split(layer.Name, ".")
-		prefixes[strings.Join(parts[:2], ".")] = struct{}{}
+	var mem uint64
+	for _, layer := range ggml.Tensors().Layers() {
+		mem += layer.size()
 	}

-	var ask int64
-	for prefix := range prefixes {
-		ask += ggml.LayerSize(prefix)
-	}
-
-	return ask
+	return mem
 }

 type ServerStatus int
--- a/server/download.go
+++ b/server/download.go
@@ -247,7 +247,8 @@ func (b *blobDownload) downloadChunk(ctx context.Context, requestURL *url.URL, w
 				}

 				if !part.lastUpdated.IsZero() && time.Since(part.lastUpdated) > 5*time.Second {
-					slog.Info(fmt.Sprintf("%s part %d stalled; retrying", b.Digest[7:19], part.N))
+					const msg = "%s part %d stalled; retrying. If this persists, press ctrl-c to exit, then 'ollama pull' to find a faster connection."
+					slog.Info(fmt.Sprintf(msg, b.Digest[7:19], part.N))
 					// reset last updated
 					part.lastUpdated = time.Time{}
 					return errPartStalled
--- a/types/model/digest.go
+++ b/types/model/digest.go
@@ -0,0 +1,106 @@
+package model
+
+import (
+	"database/sql"
+	"database/sql/driver"
+	"errors"
+	"fmt"
+	"log/slog"
+	"strings"
+	"unicode"
+)
+
+// Digest represents a digest of a model Manifest. It is a comparable value
+// type and is immutable.
+//
+// The zero Digest is not a valid digest.
+type Digest struct {
+	s string
+}
+
+// Type returns the digest type of the digest.
+//
+// Example:
+//
+//	ParseDigest("sha256-1234").Type() // returns "sha256"
+func (d Digest) Type() string {
+	typ, _, _ := strings.Cut(d.s, "-")
+	return typ
+}
+
+// String returns the digest in the form of "<digest-type>-<digest>", or the
+// empty string if the digest is invalid.
+func (d Digest) String() string { return d.s }
+
+// IsValid returns true if the digest is valid (not zero).
+//
+// A valid digest may be created only by ParseDigest, or
+// ParseName(name).Digest().
+func (d Digest) IsValid() bool { return d.s != "" }
+
+// LogValue implements slog.Value.
+func (d Digest) LogValue() slog.Value {
+	return slog.StringValue(d.String())
+}
+
+var (
+	_ driver.Valuer  = Digest{}
+	_ sql.Scanner    = (*Digest)(nil)
+	_ slog.LogValuer = Digest{}
+)
+
+// Scan implements the sql.Scanner interface.
+func (d *Digest) Scan(src any) error {
+	if d.IsValid() {
+		return errors.New("model.Digest: illegal Scan on valid Digest")
+	}
+	switch v := src.(type) {
+	case string:
+		*d = ParseDigest(v)
+		return nil
+	case []byte:
+		*d = ParseDigest(string(v))
+		return nil
+	}
+	return fmt.Errorf("model.Digest: invalid Scan source %T", src)
+}
+
+// Value implements the driver.Valuer interface.
+func (d Digest) Value() (driver.Value, error) {
+	return d.String(), nil
+}
+
+// ParseDigest parses a string in the form of "<digest-type>-<digest>" into a
+// Digest.
+func ParseDigest(s string) Digest {
+	typ, digest, ok := strings.Cut(s, "-")
+	if ok && isValidDigestType(typ) && isValidHex(digest) {
+		return Digest{s: s}
+	}
+	return Digest{}
+}
+
+func isValidDigestType(s string) bool {
+	if len(s) == 0 {
+		return false
+	}
+	for _, r := range s {
+		if !unicode.IsLower(r) && !unicode.IsDigit(r) {
+			return false
+		}
+	}
+	return true
+}
+
+func isValidHex(s string) bool {
+	if len(s) == 0 {
+		return false
+	}
+	for i := range s {
+		c := s[i]
+		if c < '0' || c > '9' && c < 'a' || c > 'f' {
+			return false
+		}
+	}
+	return true
+}
--- a/types/model/digest_test.go
+++ b/types/model/digest_test.go
@@ -0,0 +1,46 @@
+package model
+
+import "testing"
+
+var testDigests = map[string]Digest{
+	"":                 {},
+	"sha256-1234":      {s: "sha256-1234"},
+	"sha256-5678":      {s: "sha256-5678"},
+	"blake2-9abc":      {s: "blake2-9abc"},
+	"-1234":            {},
+	"sha256-":          {},
+	"sha256-1234-5678": {},
+	"sha256-P":         {}, //         invalid  hex
+	"sha256-1234P":     {},
+	"---":              {},
+}
+
+func TestDigestParse(t *testing.T) {
+	// Test cases.
+	for s, want := range testDigests {
+		got := ParseDigest(s)
+		t.Logf("ParseDigest(%q) = %#v", s, got)
+		if got != want {
+			t.Errorf("ParseDigest(%q) = %q; want %q", s, got, want)
+		}
+	}
+}
+
+func TestDigestString(t *testing.T) {
+	// Test cases.
+	for s, d := range testDigests {
+		want := s
+		if !d.IsValid() {
+			want = ""
+		}
+		got := d.String()
+		if got != want {
+			t.Errorf("ParseDigest(%q).String() = %q; want %q", s, got, want)
+		}
+
+		got = ParseDigest(s).String()
+		if got != want {
+			t.Errorf("roundtrip ParseDigest(%q).String() = %q; want %q", s, got, want)
+		}
+	}
+}
--- a/types/model/name.go
+++ b/types/model/name.go
@@ -0,0 +1,569 @@
+package model
+
+import (
+	"cmp"
+	"errors"
+	"hash/maphash"
+	"io"
+	"log/slog"
+	"slices"
+	"strings"
+	"sync"
+
+	"github.com/ollama/ollama/types/structs"
+)
+
+// Errors
+var (
+	// ErrInvalidName, ErrIncompleteName, and ErrInvalidDigest are not
+	// used by this package, but are exported so that other packages can
+	// use them, instead of defining their own errors for them.
+	ErrInvalidName    = errors.New("invalid model name")
+	ErrIncompleteName = errors.New("incomplete model name")
+	ErrInvalidDigest  = errors.New("invalid digest")
+)
+
+// Defaults
+const (
+	// DefaultMask is the default mask used by [Name.DisplayShortest].
+	DefaultMask = "registry.ollama.ai/library/_:latest"
+
+	// DefaultFill is the default fill used by [ParseName].
+	DefaultFill = "registry.ollama.ai/library/_:latest"
+)
+
+const MaxNamePartLen = 128
+
+type PartKind int
+
+// Levels of concreteness
+const (
+	// Each value aligns with its index in the Name.parts array.
+
+	PartHost PartKind = iota
+	PartNamespace
+	PartModel
+	PartTag
+	PartBuild
+	PartDigest
+
+	// Invalid is a special part that is used to indicate that a part is
+	// invalid. It is not a valid part of a Name.
+	//
+	// It should be kept as the last part in the list.
+	PartInvalid
+)
+
+var kindNames = map[PartKind]string{
+	PartHost:      "Host",
+	PartNamespace: "Namespace",
+	PartModel:     "Name",
+	PartTag:       "Tag",
+	PartBuild:     "Build",
+	PartDigest:    "Digest",
+	PartInvalid:   "Invalid",
+}
+
+func (k PartKind) String() string {
+	return cmp.Or(kindNames[k], "Unknown")
+}
+
+// Name is an opaque reference to a model. It holds the parts of a model
+// with the case preserved, but is not directly comparable with other Names
+// since model names can be represented with different casing depending on
+// the use case. For instance, "Mistral" and "mistral" are the same model
+// but each version may have come from different sources (e.g. copied from a
+// Web page, or from a file path).
+//
+// Valid Names can ONLY be constructed by calling [ParseName].
+//
+// A Name is valid if and only if is have a valid Model part. The other parts
+// are optional.
+//
+// A Name is considered "complete" if it has all parts present. To check if a
+// Name is complete, use [Name.IsComplete].
+//
+// To compare two names in a case-insensitive manner, use [Name.EqualFold].
+//
+// The parts of a Name are:
+//
+//   - Host: the domain of the model (optional)
+//   - Namespace: the namespace of the model (optional)
+//   - Model: the name of the model (required)
+//   - Tag: the tag of the model (optional)
+//   - Build: the build of the model; usually the quantization or "file type" (optional)
+//
+// The parts can be obtained in their original form by calling [Name.Parts].
+//
+// To check if a Name has at minimum a valid model part, use [Name.IsValid].
+//
+// To make a Name by filling in missing parts from another Name, use [Fill].
+type Name struct {
+	_     structs.Incomparable
+	parts [6]string // host, namespace, model, tag, build, digest
+
+	// TODO(bmizerany): track offsets and hold s (raw string) here? We
+	// could pack the offsets all into a single uint64 since the first
+	// parts take less bits since their max offset is less than the max
+	// offset of the next part. This would save a ton of bytes per Name
+	// and mean zero allocations for String.
+}
+
+// ParseNameFill parses s into a Name, and returns the result of filling it with
+// defaults. The input string must be a valid string
+// representation of a model name in the form:
+//
+//	[host/][namespace/]<model>[:tag][+build][@<digest-type>-<digest>]
+//
+// The name part is required, all others are optional. If a part is missing,
+// it is left empty in the returned Name. If a part is invalid, the zero Ref
+// value is returned.
+//
+// The build part is normalized to uppercase.
+//
+// Examples of valid paths:
+//
+//	"example.com/library/mistral:7b+x"
+//	"example.com/eva/mistral:7b+Q4_0"
+//	"mistral:7b+x"
+//	"example.com/mike/mistral:latest+Q4_0"
+//	"example.com/bruce/mistral:latest"
+//	"example.com/pdevine/thisisfine:7b+Q4_0@sha256-1234567890abcdef"
+//
+// Examples of invalid paths:
+//
+//	"example.com/mistral:7b+"
+//	"example.com/mistral:7b+Q4_0+"
+//	"x/y/z/z:8n+I"
+//	""
+//
+// It returns the zero value if any part is invalid.
+//
+// As a rule of thumb, an valid name is one that can be round-tripped with
+// the [Name.String] method. That means ("x+") is invalid because
+// [Name.String] will not print a "+" if the build is empty.
+//
+// For more about filling in missing parts, see [Fill].
+func ParseName(s, defaults string) Name {
+	var r Name
+	parts(s)(func(kind PartKind, part string) bool {
+		if kind == PartInvalid {
+			r = Name{}
+			return false
+		}
+		if kind == PartDigest && !ParseDigest(part).IsValid() {
+			r = Name{}
+			return false
+		}
+		r.parts[kind] = part
+		return true
+	})
+	if r.IsValid() || r.IsResolved() {
+		if defaults == "" {
+			return r
+		}
+		return Fill(r, ParseName(defaults, ""))
+	}
+	return Name{}
+}
+
+func MustParseNameFill(s, defaults string) Name {
+	r := ParseName(s, "")
+	if !r.IsValid() {
+		panic("model.MustParseName: invalid name: " + s)
+	}
+	return r
+}
+
+// Fill fills in the missing parts of dst with the parts of src.
+//
+// The returned Name will only be valid if dst is valid.
+func Fill(dst, src Name) Name {
+	var r Name
+	for i := range r.parts {
+		r.parts[i] = cmp.Or(dst.parts[i], src.parts[i])
+	}
+	return r
+}
+
+// WithBuild returns a copy of r with the build set to the given string.
+func (r Name) WithBuild(build string) Name {
+	r.parts[PartBuild] = build
+	return r
+}
+
+func (r Name) WithDigest(digest Digest) Name {
+	r.parts[PartDigest] = digest.String()
+	return r
+}
+
+var mapHashSeed = maphash.MakeSeed()
+
+// MapHash returns a case insensitive hash for use in maps and equality
+// checks. For a convenient way to compare names, use [Name.EqualFold].
+//
+//nolint:errcheck
+func (r Name) MapHash() uint64 {
+	// correctly hash the parts with case insensitive comparison
+	var h maphash.Hash
+	h.SetSeed(mapHashSeed)
+	for _, part := range r.Parts() {
+		// downcase the part for hashing
+		for i := range part {
+			c := part[i]
+			if c >= 'A' && c <= 'Z' {
+				c = c - 'A' + 'a'
+			}
+			h.WriteByte(c)
+		}
+	}
+	return h.Sum64()
+}
+
+func (r Name) slice(from, to PartKind) Name {
+	var v Name
+	copy(v.parts[from:to+1], r.parts[from:to+1])
+	return v
+}
+
+const empty = "?/?/?:?+?@?"
+
+// DisplayShortest returns the shortest possible display string in form:
+//
+//	[host/][<namespace>/]<model>[:<tag>]
+//
+// The host is omitted if it is the mask host is the same as r.
+// The namespace is omitted if the host and the namespace are the same as r.
+// The tag is omitted if it is the mask tag is the same as r.
+func (r Name) DisplayShortest(mask string) string {
+	mask = cmp.Or(mask, DefaultMask)
+	d := ParseName(mask, empty)
+	if !d.IsValid() {
+		panic("mask is an invalid Name")
+	}
+	equalSlice := func(form, to PartKind) bool {
+		return r.slice(form, to).EqualFold(d.slice(form, to))
+	}
+	if equalSlice(PartHost, PartNamespace) {
+		r.parts[PartNamespace] = ""
+	}
+	if equalSlice(PartHost, PartHost) {
+		r.parts[PartHost] = ""
+	}
+	if equalSlice(PartTag, PartTag) {
+		r.parts[PartTag] = ""
+	}
+	return r.slice(PartHost, PartTag).String()
+}
+
+var seps = [...]string{
+	PartHost:      "/",
+	PartNamespace: "/",
+	PartModel:     ":",
+	PartTag:       "+",
+	PartBuild:     "@",
+	PartDigest:    "",
+}
+
+// WriteTo implements io.WriterTo. It writes the fullest possible display
+// string in form:
+//
+//	<host>/<namespace>/<model>:<tag>+<build>@<digest-type>-<digest>
+//
+// Missing parts and their separators are not written.
+//
+// The full digest is always prefixed with "@". That is if [Name.IsValid]
+// reports false and [Name.IsResolved] reports true, then the string is
+// returned as "@<digest-type>-<digest>".
+func (r Name) writeTo(w io.StringWriter) error {
+	var partsWritten int
+	for i := range r.parts {
+		if r.parts[i] == "" {
+			continue
+		}
+		if partsWritten > 0 || i == int(PartDigest) {
+			if _, err := w.WriteString(seps[i-1]); err != nil {
+				return err
+			}
+		}
+		if _, err := w.WriteString(r.parts[i]); err != nil {
+			return err
+		}
+		partsWritten++
+	}
+	return nil
+}
+
+var builderPool = sync.Pool{
+	New: func() interface{} {
+		return &strings.Builder{}
+	},
+}
+
+// String returns the fullest possible display string in form:
+//
+//	<host>/<namespace>/<model>:<tag>+<build>
+//
+// If any part is missing, it is omitted from the display string.
+//
+// For the fullest possible display string without the build, use
+// [Name.DisplayFullest].
+func (r Name) String() string {
+	b := builderPool.Get().(*strings.Builder)
+	defer builderPool.Put(b)
+	b.Reset()
+	b.Grow(50) // arbitrarily long enough for most names
+	_ = r.writeTo(b)
+	return b.String()
+}
+
+// GoString implements fmt.GoStringer. It returns a string suitable for
+// debugging and logging. It is similar to [Name.String] but it always
+// returns a string that includes all parts of the Name, with missing parts
+// replaced with a ("?").
+func (r Name) GoString() string {
+	for i := range r.parts {
+		r.parts[i] = cmp.Or(r.parts[i], "?")
+	}
+	return r.String()
+}
+
+// LogValue implements slog.Valuer.
+func (r Name) LogValue() slog.Value {
+	return slog.StringValue(r.GoString())
+}
+
+// IsComplete reports whether the Name is fully qualified. That is it has a
+// domain, namespace, name, tag, and build.
+func (r Name) IsComplete() bool {
+	return !slices.Contains(r.parts[:PartDigest], "")
+}
+
+// IsCompleteNoBuild is like [Name.IsComplete] but it does not require the
+// build part to be present.
+func (r Name) IsCompleteNoBuild() bool {
+	return !slices.Contains(r.parts[:PartBuild], "")
+}
+
+// IsResolved reports true if the Name has a valid digest.
+//
+// It is possible to have a valid Name, or a complete Name that is not
+// resolved.
+func (r Name) IsResolved() bool {
+	return r.Digest().IsValid()
+}
+
+// Digest returns the digest part of the Name, if any.
+//
+// If Digest returns a non-empty string, then [Name.IsResolved] will return
+// true, and digest is considered valid.
+func (r Name) Digest() Digest {
+	// This was already validated by ParseName, so we can just return it.
+	return Digest{r.parts[PartDigest]}
+}
+
+// EqualFold reports whether r and o are equivalent model names, ignoring
+// case.
+func (r Name) EqualFold(o Name) bool {
+	return r.CompareFold(o) == 0
+}
+
+// CompareFold performs a case-insensitive cmp.Compare on r and o.
+//
+// This can be used with [slices.SortFunc].
+//
+// For simple equality checks, use [Name.EqualFold].
+func (r Name) CompareFold(o Name) int {
+	return slices.CompareFunc(r.parts[:], o.parts[:], compareFold)
+}
+
+func compareFold(a, b string) int {
+	return slices.CompareFunc([]rune(a), []rune(b), func(a, b rune) int {
+		return cmp.Compare(downcase(a), downcase(b))
+	})
+}
+
+func downcase(r rune) rune {
+	if r >= 'A' && r <= 'Z' {
+		return r - 'A' + 'a'
+	}
+	return r
+}
+
+// TODO(bmizerany): driver.Value? (MarshalText etc should be enough)
+
+// Parts returns the parts of the Name in order of concreteness.
+//
+// The length of the returned slice is always 5.
+func (r Name) Parts() []string {
+	return slices.Clone(r.parts[:])
+}
+
+// iter_Seq2 is a iter.Seq2 defined here to avoid the current build
+// restrictions in the go1.22 iter package requiring the
+// goexperiment.rangefunc tag to be set via the GOEXPERIMENT=rangefunc flag,
+// which we are not yet ready to support.
+//
+// Once we are ready to support rangefunc, this can be removed and replaced
+// with the iter.Seq2 type.
+type iter_Seq2[A, B any] func(func(A, B) bool)
+
+// Parts returns a sequence of the parts of a Name string from most specific
+// to least specific.
+//
+// It normalizes the input string by removing "http://" and "https://" only.
+// No other normalizations are performed.
+func parts(s string) iter_Seq2[PartKind, string] {
+	return func(yield func(PartKind, string) bool) {
+		//nolint:gosimple
+		if strings.HasPrefix(s, "http://") {
+			s = s[len("http://"):]
+		}
+		//nolint:gosimple
+		if strings.HasPrefix(s, "https://") {
+			s = s[len("https://"):]
+		}
+
+		if len(s) > MaxNamePartLen || len(s) == 0 {
+			return
+		}
+
+		yieldValid := func(kind PartKind, part string) bool {
+			if !isValidPart(kind, part) {
+				yield(PartInvalid, "")
+				return false
+			}
+			return yield(kind, part)
+		}
+
+		numConsecutiveDots := 0
+		partLen := 0
+		state, j := PartDigest, len(s)
+		for i := len(s) - 1; i >= 0; i-- {
+			if partLen++; partLen > MaxNamePartLen {
+				// catch a part that is too long early, so
+				// we don't keep spinning on it, waiting for
+				// an isInValidPart check which would scan
+				// over it again.
+				yield(PartInvalid, "")
+				return
+			}
+
+			switch s[i] {
+			case '@':
+				switch state {
+				case PartDigest:
+					if !yieldValid(PartDigest, s[i+1:j]) {
+						return
+					}
+					if i == 0 {
+						// This is the form
+						// "@<digest>" which is valid.
+						//
+						// We're done.
+						return
+					}
+					state, j, partLen = PartBuild, i, 0
+				default:
+					yield(PartInvalid, "")
+					return
+				}
+			case '+':
+				switch state {
+				case PartBuild, PartDigest:
+					if !yieldValid(PartBuild, s[i+1:j]) {
+						return
+					}
+					state, j, partLen = PartTag, i, 0
+				default:
+					yield(PartInvalid, "")
+					return
+				}
+			case ':':
+				switch state {
+				case PartTag, PartBuild, PartDigest:
+					if !yieldValid(PartTag, s[i+1:j]) {
+						return
+					}
+					state, j, partLen = PartModel, i, 0
+				default:
+					yield(PartInvalid, "")
+					return
+				}
+			case '/':
+				switch state {
+				case PartModel, PartTag, PartBuild, PartDigest:
+					if !yieldValid(PartModel, s[i+1:j]) {
+						return
+					}
+					state, j = PartNamespace, i
+				case PartNamespace:
+					if !yieldValid(PartNamespace, s[i+1:j]) {
+						return
+					}
+					state, j, partLen = PartHost, i, 0
+				default:
+					yield(PartInvalid, "")
+					return
+				}
+			default:
+				if s[i] == '.' {
+					if numConsecutiveDots++; numConsecutiveDots > 1 {
+						yield(PartInvalid, "")
+						return
+					}
+				} else {
+					numConsecutiveDots = 0
+				}
+				if !isValidByteFor(state, s[i]) {
+					yield(PartInvalid, "")
+					return
+				}
+			}
+		}
+
+		if state <= PartNamespace {
+			yieldValid(state, s[:j])
+		} else {
+			yieldValid(PartModel, s[:j])
+		}
+	}
+}
+
+func (r Name) IsZero() bool {
+	return r.parts == [6]string{}
+}
+
+// IsValid reports if a model has at minimum a valid model part.
+func (r Name) IsValid() bool {
+	// Parts ensures we only have valid parts, so no need to validate
+	// them here, only check if we have a name or not.
+	return r.parts[PartModel] != ""
+}
+
+// isValidPart reports if s contains all valid characters for the given
+// part kind.
+func isValidPart(kind PartKind, s string) bool {
+	if s == "" {
+		return false
+	}
+	for _, c := range []byte(s) {
+		if !isValidByteFor(kind, c) {
+			return false
+		}
+	}
+	return true
+}
+
+func isValidByteFor(kind PartKind, c byte) bool {
+	if kind == PartNamespace && c == '.' {
+		return false
+	}
+	if c == '.' || c == '-' {
+		return true
+	}
+	if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' {
+		return true
+	}
+	return false
+}
--- a/types/model/name_test.go
+++ b/types/model/name_test.go
@@ -0,0 +1,490 @@
+package model
+
+import (
+	"bytes"
+	"cmp"
+	"fmt"
+	"log/slog"
+	"slices"
+	"strings"
+	"testing"
+)
+
+type fields struct {
+	host, namespace, model, tag, build string
+	digest                             string
+}
+
+func fieldsFromName(p Name) fields {
+	return fields{
+		host:      p.parts[PartHost],
+		namespace: p.parts[PartNamespace],
+		model:     p.parts[PartModel],
+		tag:       p.parts[PartTag],
+		build:     p.parts[PartBuild],
+		digest:    p.parts[PartDigest],
+	}
+}
+
+var testNames = map[string]fields{
+	"mistral:latest":                 {model: "mistral", tag: "latest"},
+	"mistral":                        {model: "mistral"},
+	"mistral:30B":                    {model: "mistral", tag: "30B"},
+	"mistral:7b":                     {model: "mistral", tag: "7b"},
+	"mistral:7b+Q4_0":                {model: "mistral", tag: "7b", build: "Q4_0"},
+	"mistral+KQED":                   {model: "mistral", build: "KQED"},
+	"mistral.x-3:7b+Q4_0":            {model: "mistral.x-3", tag: "7b", build: "Q4_0"},
+	"mistral:7b+q4_0":                {model: "mistral", tag: "7b", build: "q4_0"},
+	"llama2":                         {model: "llama2"},
+	"user/model":                     {namespace: "user", model: "model"},
+	"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
+	"example.com/ns/mistral:7b+X":    {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
+
+	// invalid digest
+	"mistral:latest@invalid256-": {},
+	"mistral:latest@-123":        {},
+	"mistral:latest@!-123":       {},
+	"mistral:latest@1-!":         {},
+	"mistral:latest@":            {},
+
+	// resolved
+	"x@sha123-1": {model: "x", digest: "sha123-1"},
+	"@sha456-2":  {digest: "sha456-2"},
+
+	"@@sha123-1": {},
+
+	// preserves case for build
+	"x+b": {model: "x", build: "b"},
+
+	// invalid (includes fuzzing trophies)
+	" / / : + ": {},
+	" / : + ":   {},
+	" : + ":     {},
+	" + ":       {},
+	" : ":       {},
+	" / ":       {},
+	" /":        {},
+	"/ ":        {},
+	"/":         {},
+	":":         {},
+	"+":         {},
+
+	// (".") in namepsace is not allowed
+	"invalid.com/7b+x": {},
+
+	"invalid:7b+Q4_0:latest": {},
+	"in valid":               {},
+	"invalid/y/z/foo":        {},
+	"/0":                     {},
+	"0 /0":                   {},
+	"0 /":                    {},
+	"0/":                     {},
+	":/0":                    {},
+	"+0/00000":               {},
+	"0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91": {},
+	"0//0":                        {},
+	"m+^^^":                       {},
+	"file:///etc/passwd":          {},
+	"file:///etc/passwd:latest":   {},
+	"file:///etc/passwd:latest+u": {},
+
+	":x": {},
+	"+x": {},
+	"x+": {},
+
+	// Disallow ("\.+") in any part to prevent path traversal anywhere
+	// we convert the name to a path.
+	"../etc/passwd":  {},
+	".../etc/passwd": {},
+	"./../passwd":    {},
+	"./0+..":         {},
+
+	strings.Repeat("a", MaxNamePartLen):   {model: strings.Repeat("a", MaxNamePartLen)},
+	strings.Repeat("a", MaxNamePartLen+1): {},
+}
+
+// TestConsecutiveDots tests that consecutive dots are not allowed in any
+// part, to avoid path traversal. There also are some tests in testNames, but
+// this test is more exhaustive and exists to emphasize the importance of
+// preventing path traversal.
+func TestNameConsecutiveDots(t *testing.T) {
+	for i := 1; i < 10; i++ {
+		s := strings.Repeat(".", i)
+		if i > 1 {
+			if g := ParseName(s, "").String(); g != "" {
+				t.Errorf("ParseName(%q) = %q; want empty string", s, g)
+			}
+		} else {
+			if g := ParseName(s, "").String(); g != s {
+				t.Errorf("ParseName(%q) = %q; want %q", s, g, s)
+			}
+		}
+	}
+}
+
+func TestNameParts(t *testing.T) {
+	var p Name
+	if w, g := int(PartDigest+1), len(p.Parts()); w != g {
+		t.Errorf("Parts() = %d; want %d", g, w)
+	}
+}
+
+func TestNamePartString(t *testing.T) {
+	if g := PartKind(-2).String(); g != "Unknown" {
+		t.Errorf("Unknown part = %q; want %q", g, "Unknown")
+	}
+	for kind, name := range kindNames {
+		if g := kind.String(); g != name {
+			t.Errorf("%s = %q; want %q", kind, g, name)
+		}
+	}
+}
+
+func TestParseName(t *testing.T) {
+	for baseName, want := range testNames {
+		for _, prefix := range []string{"", "https://", "http://"} {
+			// We should get the same results with or without the
+			// http(s) prefixes
+			s := prefix + baseName
+
+			t.Run(s, func(t *testing.T) {
+				name := ParseName(s, "")
+				got := fieldsFromName(name)
+				if got != want {
+					t.Errorf("ParseName(%q) = %q; want %q", s, got, want)
+				}
+
+				// test round-trip
+				if !ParseName(name.String(), "").EqualFold(name) {
+					t.Errorf("ParseName(%q).String() = %s; want %s", s, name.String(), baseName)
+				}
+			})
+		}
+	}
+}
+
+func TestCompleteWithAndWithoutBuild(t *testing.T) {
+	cases := []struct {
+		in              string
+		complete        bool
+		completeNoBuild bool
+	}{
+		{"", false, false},
+		{"incomplete/mistral:7b+x", false, false},
+		{"incomplete/mistral:7b+Q4_0", false, false},
+		{"incomplete:7b+x", false, false},
+		{"complete.com/x/mistral:latest+Q4_0", true, true},
+		{"complete.com/x/mistral:latest", false, true},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.in, func(t *testing.T) {
+			p := ParseName(tt.in, "")
+			t.Logf("ParseName(%q) = %#v", tt.in, p)
+			if g := p.IsComplete(); g != tt.complete {
+				t.Errorf("Complete(%q) = %v; want %v", tt.in, g, tt.complete)
+			}
+			if g := p.IsCompleteNoBuild(); g != tt.completeNoBuild {
+				t.Errorf("CompleteNoBuild(%q) = %v; want %v", tt.in, g, tt.completeNoBuild)
+			}
+		})
+	}
+
+	// Complete uses Parts which returns a slice, but it should be
+	// inlined when used in Complete, preventing any allocations or
+	// escaping to the heap.
+	allocs := testing.AllocsPerRun(1000, func() {
+		keep(ParseName("complete.com/x/mistral:latest+Q4_0", "").IsComplete())
+	})
+	if allocs > 0 {
+		t.Errorf("Complete allocs = %v; want 0", allocs)
+	}
+}
+
+func TestNameLogValue(t *testing.T) {
+	cases := []string{
+		"example.com/library/mistral:latest+Q4_0",
+		"mistral:latest",
+		"mistral:7b+Q4_0",
+	}
+	for _, s := range cases {
+		t.Run(s, func(t *testing.T) {
+			var b bytes.Buffer
+			log := slog.New(slog.NewTextHandler(&b, nil))
+			name := ParseName(s, "")
+			log.Info("", "name", name)
+			want := fmt.Sprintf("name=%s", name.GoString())
+			got := b.String()
+			if !strings.Contains(got, want) {
+				t.Errorf("expected log output to contain %q; got %q", want, got)
+			}
+		})
+	}
+}
+
+func TestNameGoString(t *testing.T) {
+	cases := []struct {
+		name         string
+		in           string
+		wantString   string
+		wantGoString string // default is tt.in
+	}{
+		{
+			name:         "Complete Name",
+			in:           "example.com/library/mistral:latest+Q4_0",
+			wantGoString: "example.com/library/mistral:latest+Q4_0@?",
+		},
+		{
+			name:         "Short Name",
+			in:           "mistral:latest",
+			wantGoString: "?/?/mistral:latest+?@?",
+		},
+		{
+			name:         "Long Name",
+			in:           "library/mistral:latest",
+			wantGoString: "?/library/mistral:latest+?@?",
+		},
+		{
+			name:         "Case Preserved",
+			in:           "Library/Mistral:Latest",
+			wantGoString: "?/Library/Mistral:Latest+?@?",
+		},
+		{
+			name:         "With digest",
+			in:           "Library/Mistral:Latest@sha256-123456",
+			wantGoString: "?/Library/Mistral:Latest+?@sha256-123456",
+		},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			p := ParseName(tt.in, "")
+			tt.wantGoString = cmp.Or(tt.wantGoString, tt.in)
+			if g := fmt.Sprintf("%#v", p); g != tt.wantGoString {
+				t.Errorf("GoString() = %q; want %q", g, tt.wantGoString)
+			}
+		})
+	}
+}
+
+func TestDisplayShortest(t *testing.T) {
+	cases := []struct {
+		in        string
+		mask      string
+		want      string
+		wantPanic bool
+	}{
+		{"example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
+		{"example.com/library/mistral:latest+Q4_0", "example.com/_/_:latest", "library/mistral", false},
+		{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
+		{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
+
+		// case-insensitive
+		{"Example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
+		{"example.com/Library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
+		{"example.com/library/Mistral:latest+Q4_0", "example.com/library/_:latest", "Mistral", false},
+		{"example.com/library/mistral:Latest+Q4_0", "example.com/library/_:latest", "mistral", false},
+		{"example.com/library/mistral:Latest+q4_0", "example.com/library/_:latest", "mistral", false},
+
+		// invalid mask
+		{"example.com/library/mistral:latest+Q4_0", "example.com/mistral", "", true},
+
+		// DefaultMask
+		{"registry.ollama.ai/library/mistral:latest+Q4_0", DefaultMask, "mistral", false},
+
+		// Auto-Fill
+		{"x", "example.com/library/_:latest", "x", false},
+		{"x", "example.com/library/_:latest+Q4_0", "x", false},
+		{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
+		{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
+	}
+
+	for _, tt := range cases {
+		t.Run("", func(t *testing.T) {
+			defer func() {
+				if tt.wantPanic {
+					if recover() == nil {
+						t.Errorf("expected panic")
+					}
+				}
+			}()
+
+			p := ParseName(tt.in, "")
+			t.Logf("ParseName(%q) = %#v", tt.in, p)
+			if g := p.DisplayShortest(tt.mask); g != tt.want {
+				t.Errorf("got = %q; want %q", g, tt.want)
+			}
+		})
+	}
+}
+
+func TestParseNameAllocs(t *testing.T) {
+	allocs := testing.AllocsPerRun(1000, func() {
+		keep(ParseName("example.com/mistral:7b+Q4_0", ""))
+	})
+	if allocs > 0 {
+		t.Errorf("ParseName allocs = %v; want 0", allocs)
+	}
+}
+
+func BenchmarkParseName(b *testing.B) {
+	b.ReportAllocs()
+
+	for range b.N {
+		keep(ParseName("example.com/mistral:7b+Q4_0", ""))
+	}
+}
+
+func FuzzParseName(f *testing.F) {
+	f.Add("example.com/mistral:7b+Q4_0")
+	f.Add("example.com/mistral:7b+q4_0")
+	f.Add("example.com/mistral:7b+x")
+	f.Add("x/y/z:8n+I")
+	f.Add(":x")
+	f.Add("@sha256-123456")
+	f.Add("example.com/mistral:latest+Q4_0@sha256-123456")
+	f.Add(":@!@")
+	f.Add("...")
+	f.Fuzz(func(t *testing.T, s string) {
+		r0 := ParseName(s, "")
+
+		if strings.Contains(s, "..") && !r0.IsZero() {
+			t.Fatalf("non-zero value for path with '..': %q", s)
+		}
+
+		if !r0.IsValid() && !r0.IsResolved() {
+			if !r0.EqualFold(Name{}) {
+				t.Errorf("expected invalid path to be zero value; got %#v", r0)
+			}
+			t.Skipf("invalid path: %q", s)
+		}
+
+		for _, p := range r0.Parts() {
+			if len(p) > MaxNamePartLen {
+				t.Errorf("part too long: %q", p)
+			}
+		}
+
+		if !strings.EqualFold(r0.String(), s) {
+			t.Errorf("String() did not round-trip with case insensitivity: %q\ngot  = %q\nwant = %q", s, r0.String(), s)
+		}
+
+		r1 := ParseName(r0.String(), "")
+		if !r0.EqualFold(r1) {
+			t.Errorf("round-trip mismatch: %+v != %+v", r0, r1)
+		}
+	})
+}
+
+func TestFill(t *testing.T) {
+	cases := []struct {
+		dst  string
+		src  string
+		want string
+	}{
+		{"mistral", "o.com/library/PLACEHOLDER:latest+Q4_0", "o.com/library/mistral:latest+Q4_0"},
+		{"o.com/library/mistral", "PLACEHOLDER:latest+Q4_0", "o.com/library/mistral:latest+Q4_0"},
+		{"", "o.com/library/mistral:latest+Q4_0", "o.com/library/mistral:latest+Q4_0"},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.dst, func(t *testing.T) {
+			r := Fill(ParseName(tt.dst, ""), ParseName(tt.src, ""))
+			if r.String() != tt.want {
+				t.Errorf("Fill(%q, %q) = %q; want %q", tt.dst, tt.src, r, tt.want)
+			}
+		})
+	}
+}
+
+func TestNameStringAllocs(t *testing.T) {
+	name := ParseName("example.com/ns/mistral:latest+Q4_0", "")
+	allocs := testing.AllocsPerRun(1000, func() {
+		keep(name.String())
+	})
+	if allocs > 1 {
+		t.Errorf("String allocs = %v; want 0", allocs)
+	}
+}
+
+func ExampleFill() {
+	defaults := ParseName("registry.ollama.com/library/PLACEHOLDER:latest+Q4_0", "")
+	r := Fill(ParseName("mistral", ""), defaults)
+	fmt.Println(r)
+
+	// Output:
+	// registry.ollama.com/library/mistral:latest+Q4_0
+}
+
+func ExampleName_MapHash() {
+	m := map[uint64]bool{}
+
+	// key 1
+	m[ParseName("mistral:latest+q4", "").MapHash()] = true
+	m[ParseName("miSTRal:latest+Q4", "").MapHash()] = true
+	m[ParseName("mistral:LATest+Q4", "").MapHash()] = true
+
+	// key 2
+	m[ParseName("mistral:LATest", "").MapHash()] = true
+
+	fmt.Println(len(m))
+	// Output:
+	// 2
+}
+
+func ExampleName_CompareFold_sort() {
+	names := []Name{
+		ParseName("mistral:latest", ""),
+		ParseName("mistRal:7b+q4", ""),
+		ParseName("MIstral:7b", ""),
+	}
+
+	slices.SortFunc(names, Name.CompareFold)
+
+	for _, n := range names {
+		fmt.Println(n)
+	}
+
+	// Output:
+	// MIstral:7b
+	// mistRal:7b+q4
+	// mistral:latest
+}
+
+func ExampleName_completeAndResolved() {
+	for _, s := range []string{
+		"x/y/z:latest+q4_0@sha123-1",
+		"x/y/z:latest+q4_0",
+		"@sha123-1",
+	} {
+		name := ParseName(s, "")
+		fmt.Printf("complete:%v resolved:%v  digest:%s\n", name.IsComplete(), name.IsResolved(), name.Digest())
+	}
+
+	// Output:
+	// complete:true resolved:true  digest:sha123-1
+	// complete:true resolved:false  digest:
+	// complete:false resolved:true  digest:sha123-1
+}
+
+func ExampleName_DisplayShortest() {
+	name := ParseName("example.com/jmorganca/mistral:latest+Q4_0", "")
+
+	fmt.Println(name.DisplayShortest("example.com/jmorganca/_:latest"))
+	fmt.Println(name.DisplayShortest("example.com/_/_:latest"))
+	fmt.Println(name.DisplayShortest("example.com/_/_:_"))
+	fmt.Println(name.DisplayShortest("_/_/_:_"))
+
+	// Default
+	name = ParseName("registry.ollama.ai/library/mistral:latest+Q4_0", "")
+	fmt.Println(name.DisplayShortest(""))
+
+	// Output:
+	// mistral
+	// jmorganca/mistral
+	// jmorganca/mistral:latest
+	// example.com/jmorganca/mistral:latest
+	// mistral
+}
+
+func keep[T any](v T) T { return v }
--- a/types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa
+++ b/types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("/0")
--- a/types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6
+++ b/types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("0//0")
--- a/types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d
+++ b/types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("0 /0")
--- a/types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab
+++ b/types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("+0/00000")
--- a/types/model/testdata/fuzz/FuzzParseRef/82c2975c430ac608
+++ b/types/model/testdata/fuzz/FuzzParseRef/82c2975c430ac608
@@ -0,0 +1,2 @@
+go test fuzz v1
+string(":")
--- a/types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948
+++ b/types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91")
--- a/types/structs/structs.go
+++ b/types/structs/structs.go
@@ -0,0 +1,15 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+// Package structs contains the Incomparable type.
+package structs
+
+// Incomparable is a zero-width incomparable type. If added as the
+// first field in a struct, it marks that struct as not comparable
+// (can't do == or be a map key) and usually doesn't add any width to
+// the struct (unless the struct has only small fields).
+//
+// By making a struct incomparable, you can prevent misuse (prevent
+// people from using ==), but also you can shrink generated binaries,
+// as the compiler can omit equality funcs from the binary.
+type Incomparable [0]func()
Author	SHA1	Message	Date
Blake Mizerany	5cddcd079a	...	2024-04-11 00:57:08 -07:00
Blake Mizerany	0efb7931c7	Revert "types/model: remove (*Digest).Scan and Digest.Value (#3589 )" This reverts commit `42f2cc408e`.	2024-04-11 00:45:07 -07:00
Blake Mizerany	42f2cc408e	types/model: remove (*Digest).Scan and Digest.Value (#3589 )	2024-04-11 00:37:26 -07:00
Blake Mizerany	9446b795b5	types/model: remove DisplayLong (#3587 )	2024-04-10 16:55:12 -07:00
Blake Mizerany	62f8cda3b3	types/model: remove MarshalText/UnmarshalText from Digest (#3586 )	2024-04-10 16:52:49 -07:00
Blake Mizerany	6a1de23175	types/model: init with Name and Digest types (#3541 )	2024-04-10 16:30:05 -07:00
Blake Mizerany	a7b431e743	server: provide helpful workaround hint when stalling on pull (#3584 ) This is a quick fix to help users who are stuck on the "pull" step at 99%. In the near future we're introducing a new registry client that should/will hopefully be smarter. In the meantime, this should unblock the users hitting issue #1736.	2024-04-10 16:24:37 -07:00
Michael Yang	5a25f93522	Merge pull request #3478 from ollama/mxyng/tensor-layer refactor tensor query	2024-04-10 12:45:03 -07:00
Michael Yang	7e33a017c0	partial offloading	2024-04-10 11:37:20 -07:00
Michael Yang	8b2c10061c	refactor tensor query	2024-04-10 11:37:20 -07:00
Michael Yang	c5c451ca3b	Merge pull request #3579 from ollama/mxyng/fix-ci fix ci	2024-04-10 11:37:01 -07:00
Michael Yang	2b4ca6cf36	fix ci	2024-04-10 11:35:12 -07:00
Eli Bendersky	ad90b9ab3d	api: start adding documentation to package api (#2878 ) * api: start adding documentation to package api Updates #2840 * Fix lint typo report	2024-04-10 13:31:55 -04:00
Eli Bendersky	4340f8eba4	examples: start adding Go examples using api/ (#2879 ) We can have the same examples as e.g. https://github.com/ollama/ollama-python/tree/main/examples here. Using consistent naming and renaming the existing example to have -http- since it uses direct HTTP requests rather than api/ Updates #2840	2024-04-10 13:26:45 -04:00