mirror of
https://github.com/ollama/ollama.git
synced 2026-04-27 11:15:40 +02:00
Compare commits
22 Commits
pdevine/sh
...
v0.1.33-rc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4e1ff6dcbb | ||
|
|
8589d752ac | ||
|
|
de4ded68b0 | ||
|
|
9b5a3c5991 | ||
|
|
00b0699c75 | ||
|
|
993cf8bf55 | ||
|
|
7bb7cb8a60 | ||
|
|
b123be5b71 | ||
|
|
ddf5c09a9b | ||
|
|
5f73c08729 | ||
|
|
f503a848c2 | ||
|
|
36a6daccab | ||
|
|
ceb0e26e5e | ||
|
|
284e02bed0 | ||
|
|
3450a57d4a | ||
|
|
592dae31c8 | ||
|
|
2010cbc5fa | ||
|
|
ac0801eced | ||
|
|
ad66e5b060 | ||
|
|
ade4b55520 | ||
|
|
a6d62e0617 | ||
|
|
0d6687f84c |
11
.github/workflows/release.yaml
vendored
11
.github/workflows/release.yaml
vendored
@@ -311,29 +311,18 @@ jobs:
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: generate-windows-cpu
|
||||
path: |
|
||||
llm/build
|
||||
dist/windows-amd64
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: generate-windows-cuda
|
||||
path: |
|
||||
llm/build
|
||||
dist/windows-amd64
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: windows-cuda-deps
|
||||
path: dist/deps
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: windows-rocm-deps
|
||||
path: dist/deps
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: generate-windows-rocm
|
||||
path: |
|
||||
llm/build
|
||||
dist/windows-amd64
|
||||
- run: dir llm/build
|
||||
- run: |
|
||||
$gopath=(get-command go).source | split-path -parent
|
||||
|
||||
@@ -396,8 +396,10 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
|
||||
func DefaultOptions() Options {
|
||||
return Options{
|
||||
// options set on request to runner
|
||||
NumPredict: -1,
|
||||
NumKeep: 0,
|
||||
NumPredict: -1,
|
||||
|
||||
// set a minimal num_keep to avoid issues on context shifts
|
||||
NumKeep: 4,
|
||||
Temperature: 0.8,
|
||||
TopK: 40,
|
||||
TopP: 0.9,
|
||||
|
||||
183
cmd/cmd.go
183
cmd/cmd.go
@@ -17,6 +17,7 @@ import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
@@ -53,8 +54,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
p := progress.NewProgress(os.Stderr)
|
||||
defer p.Stop()
|
||||
|
||||
bars := make(map[string]*progress.Bar)
|
||||
|
||||
modelfile, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -95,95 +94,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO make this work w/ adapters
|
||||
if fi.IsDir() {
|
||||
tf, err := os.CreateTemp("", "ollama-tf")
|
||||
// this is likely a safetensors or pytorch directory
|
||||
// TODO make this work w/ adapters
|
||||
tempfile, err := tempZipFiles(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer os.RemoveAll(tf.Name())
|
||||
defer os.RemoveAll(tempfile)
|
||||
|
||||
zf := zip.NewWriter(tf)
|
||||
|
||||
files := []string{}
|
||||
|
||||
tfiles, err := filepath.Glob(filepath.Join(path, "pytorch_model-*.bin"))
|
||||
if err != nil {
|
||||
return err
|
||||
} else if len(tfiles) == 0 {
|
||||
tfiles, err = filepath.Glob(filepath.Join(path, "model-*.safetensors"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
files = append(files, tfiles...)
|
||||
|
||||
if len(files) == 0 {
|
||||
return fmt.Errorf("no models were found in '%s'", path)
|
||||
}
|
||||
|
||||
// add the safetensor/torch config file + tokenizer
|
||||
files = append(files, filepath.Join(path, "config.json"))
|
||||
files = append(files, filepath.Join(path, "params.json"))
|
||||
files = append(files, filepath.Join(path, "added_tokens.json"))
|
||||
files = append(files, filepath.Join(path, "tokenizer.model"))
|
||||
|
||||
for _, fn := range files {
|
||||
f, err := os.Open(fn)
|
||||
|
||||
// just skip whatever files aren't there
|
||||
if os.IsNotExist(err) {
|
||||
if strings.HasSuffix(fn, "tokenizer.model") {
|
||||
// try the parent dir before giving up
|
||||
parentDir := filepath.Dir(path)
|
||||
newFn := filepath.Join(parentDir, "tokenizer.model")
|
||||
f, err = os.Open(newFn)
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
h, err := zip.FileInfoHeader(fi)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
h.Name = filepath.Base(fn)
|
||||
h.Method = zip.Store
|
||||
|
||||
w, err := zf.CreateHeader(h)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = io.Copy(w, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if err := zf.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := tf.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
path = tf.Name()
|
||||
path = tempfile
|
||||
}
|
||||
|
||||
digest, err := createBlob(cmd, client, path)
|
||||
@@ -191,10 +111,17 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
modelfile = bytes.ReplaceAll(modelfile, []byte(c.Args), []byte("@"+digest))
|
||||
name := c.Name
|
||||
if c.Name == "model" {
|
||||
name = "from"
|
||||
}
|
||||
|
||||
re := regexp.MustCompile(fmt.Sprintf(`(?im)^(%s)\s+%s\s*$`, name, c.Args))
|
||||
modelfile = re.ReplaceAll(modelfile, []byte("$1 @"+digest))
|
||||
}
|
||||
}
|
||||
|
||||
bars := make(map[string]*progress.Bar)
|
||||
fn := func(resp api.ProgressResponse) error {
|
||||
if resp.Digest != "" {
|
||||
spinner.Stop()
|
||||
@@ -228,6 +155,88 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func tempZipFiles(path string) (string, error) {
|
||||
tempfile, err := os.CreateTemp("", "ollama-tf")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer tempfile.Close()
|
||||
|
||||
zipfile := zip.NewWriter(tempfile)
|
||||
defer zipfile.Close()
|
||||
|
||||
tfiles, err := filepath.Glob(filepath.Join(path, "pytorch_model-*.bin"))
|
||||
if err != nil {
|
||||
return "", err
|
||||
} else if len(tfiles) == 0 {
|
||||
tfiles, err = filepath.Glob(filepath.Join(path, "model-*.safetensors"))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
files := []string{}
|
||||
files = append(files, tfiles...)
|
||||
|
||||
if len(files) == 0 {
|
||||
return "", fmt.Errorf("no models were found in '%s'", path)
|
||||
}
|
||||
|
||||
// add the safetensor/torch config file + tokenizer
|
||||
files = append(files, filepath.Join(path, "config.json"))
|
||||
files = append(files, filepath.Join(path, "params.json"))
|
||||
files = append(files, filepath.Join(path, "added_tokens.json"))
|
||||
files = append(files, filepath.Join(path, "tokenizer.model"))
|
||||
|
||||
for _, fn := range files {
|
||||
f, err := os.Open(fn)
|
||||
|
||||
// just skip whatever files aren't there
|
||||
if os.IsNotExist(err) {
|
||||
if strings.HasSuffix(fn, "tokenizer.model") {
|
||||
// try the parent dir before giving up
|
||||
parentDir := filepath.Dir(path)
|
||||
newFn := filepath.Join(parentDir, "tokenizer.model")
|
||||
f, err = os.Open(newFn)
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
} else if err != nil {
|
||||
return "", err
|
||||
}
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
} else if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
h, err := zip.FileInfoHeader(fi)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
h.Name = filepath.Base(fn)
|
||||
h.Method = zip.Store
|
||||
|
||||
w, err := zipfile.CreateHeader(h)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
_, err = io.Copy(w, f)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
return tempfile.Name(), nil
|
||||
}
|
||||
|
||||
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
|
||||
bin, err := os.Open(path)
|
||||
if err != nil {
|
||||
|
||||
@@ -140,7 +140,7 @@ func AMDGetGPUInfo() []GpuInfo {
|
||||
}
|
||||
|
||||
if int(major) < RocmComputeMin {
|
||||
slog.Warn(fmt.Sprintf("amdgpu too old gfx%d%d%d", major, minor, patch), "gpu", gpuID)
|
||||
slog.Warn(fmt.Sprintf("amdgpu too old gfx%d%d%x", major, minor, patch), "gpu", gpuID)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -266,7 +266,7 @@ func AMDGetGPUInfo() []GpuInfo {
|
||||
}
|
||||
slog.Debug("rocm supported GPUs", "types", supported)
|
||||
}
|
||||
gfx := fmt.Sprintf("gfx%d%d%d", gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch)
|
||||
gfx := fmt.Sprintf("gfx%d%d%x", gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch)
|
||||
if !slices.Contains[[]string, string](supported, gfx) {
|
||||
slog.Warn("amdgpu is not supported", "gpu", gpuInfo.ID, "gpu_type", gfx, "library", libDir, "supported_types", supported)
|
||||
// TODO - consider discrete markdown just for ROCM troubleshooting?
|
||||
|
||||
@@ -149,13 +149,16 @@ func AMDGetGPUInfo() []GpuInfo {
|
||||
}
|
||||
}
|
||||
if patch != "" {
|
||||
gpuInfo.Patch, err = strconv.Atoi(patch)
|
||||
// Patch rev is hex; e.g. gfx90a
|
||||
p, err := strconv.ParseInt(patch, 16, 0)
|
||||
if err != nil {
|
||||
slog.Info("failed to parse version", "version", gfx, "error", err)
|
||||
} else {
|
||||
gpuInfo.Patch = int(p)
|
||||
}
|
||||
}
|
||||
if gpuInfo.Major < RocmComputeMin {
|
||||
slog.Warn(fmt.Sprintf("amdgpu [%s] too old gfx%d%d%d", gpuInfo.ID, gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch))
|
||||
slog.Warn(fmt.Sprintf("amdgpu [%s] too old gfx%d%d%x", gpuInfo.ID, gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch))
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ init_vars() {
|
||||
# TODO - add additional optimization flags...
|
||||
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
|
||||
fi
|
||||
case $(uname -s) in
|
||||
case $(uname -s) in
|
||||
"Darwin")
|
||||
LIB_EXT="dylib"
|
||||
WHOLE_ARCHIVE="-Wl,-force_load"
|
||||
|
||||
@@ -165,11 +165,11 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
|
||||
fi
|
||||
if [ "${ARCH}" == "arm64" ]; then
|
||||
echo "ARM CPU detected - disabling unsupported AVX instructions"
|
||||
|
||||
|
||||
# ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
|
||||
#
|
||||
# CUDA compute < 6.0 lacks proper FP16 support on ARM.
|
||||
# Disabling has minimal performance effect while maintaining compatibility.
|
||||
# CUDA compute < 6.0 lacks proper FP16 support on ARM.
|
||||
# Disabling has minimal performance effect while maintaining compatibility.
|
||||
ARM64_DEFS="-DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_CUDA_F16=off"
|
||||
fi
|
||||
# Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
@@ -100,8 +99,22 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
var layerCount int
|
||||
layers := ggml.Tensors().Layers()
|
||||
|
||||
var memoryLayerOutput uint64
|
||||
for k, v := range layers {
|
||||
if k == "output" || k == "output_norm" {
|
||||
memoryLayerOutput += v.size()
|
||||
}
|
||||
}
|
||||
|
||||
if gpus[0].Library == "metal" && opts.UseMMap {
|
||||
// memory is preallocated for output tensors
|
||||
memoryRequiredTotal += memoryLayerOutput
|
||||
memoryRequiredPartial += memoryLayerOutput
|
||||
}
|
||||
|
||||
var layerCount int
|
||||
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
|
||||
memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
|
||||
|
||||
@@ -115,15 +128,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||
}
|
||||
}
|
||||
|
||||
var memoryLayerOutput uint64
|
||||
for k, v := range layers {
|
||||
if !strings.HasPrefix(k, "blk.") {
|
||||
memoryLayerOutput += v.size()
|
||||
}
|
||||
if gpus[0].Library != "metal" || !opts.UseMMap {
|
||||
// memory was not preallocated for output tensors
|
||||
memoryRequiredTotal += memoryLayerOutput
|
||||
}
|
||||
|
||||
memoryRequiredTotal += memoryLayerOutput
|
||||
|
||||
if memoryAvailable > memoryRequiredTotal {
|
||||
layerCount = int(ggml.KV().BlockCount()) + 1
|
||||
memoryRequiredPartial = memoryRequiredTotal
|
||||
|
||||
45
llm/patches/04-metal.diff
Normal file
45
llm/patches/04-metal.diff
Normal file
@@ -0,0 +1,45 @@
|
||||
diff --git a/ggml-metal.m b/ggml-metal.m
|
||||
index 0207b787..b5e9884b 100644
|
||||
--- a/ggml-metal.m
|
||||
+++ b/ggml-metal.m
|
||||
@@ -1396,27 +1396,23 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||
// to the matrix-vector kernel
|
||||
int ne11_mm_min = 1;
|
||||
|
||||
-#if 0
|
||||
// the numbers below are measured on M2 Ultra for 7B and 13B models
|
||||
// these numbers do not translate to other devices or model sizes
|
||||
// TODO: need to find a better approach
|
||||
- if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) {
|
||||
- switch (src0t) {
|
||||
- case GGML_TYPE_F16: ne11_mm_min = 2; break;
|
||||
- case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
|
||||
- case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
|
||||
- case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
|
||||
- case GGML_TYPE_Q4_0:
|
||||
- case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
|
||||
- case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
|
||||
- case GGML_TYPE_Q5_0: // not tested yet
|
||||
- case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
|
||||
- case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
|
||||
- case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
|
||||
- default: ne11_mm_min = 1; break;
|
||||
- }
|
||||
+ switch (src0t) {
|
||||
+ case GGML_TYPE_F16: ne11_mm_min = 2; break;
|
||||
+ case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
|
||||
+ case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
|
||||
+ case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
|
||||
+ case GGML_TYPE_Q4_0:
|
||||
+ case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
|
||||
+ case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
|
||||
+ case GGML_TYPE_Q5_0: // not tested yet
|
||||
+ case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
|
||||
+ case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
|
||||
+ case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
|
||||
+ default: ne11_mm_min = 1; break;
|
||||
}
|
||||
-#endif
|
||||
|
||||
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
|
||||
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
|
||||
@@ -560,6 +560,13 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
||||
return err
|
||||
}
|
||||
defer s.sem.Release(1)
|
||||
|
||||
// only allow maximum 10 "context shifts" to avoid infinite generation
|
||||
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
|
||||
req.Options.NumPredict = 10 * s.options.NumCtx
|
||||
slog.Debug("setting token limit to 10x num_ctx", "num_ctx", s.options.NumCtx, "num_predict", req.Options.NumPredict)
|
||||
}
|
||||
|
||||
request := map[string]any{
|
||||
"prompt": req.Prompt,
|
||||
"stream": true,
|
||||
|
||||
@@ -29,6 +29,7 @@ import (
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
@@ -701,36 +702,32 @@ func convertModel(name, path string, fn func(resp api.ProgressResponse)) (string
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func CopyModel(src, dest string) error {
|
||||
srcModelPath := ParseModelPath(src)
|
||||
srcPath, err := srcModelPath.GetManifestPath()
|
||||
func CopyModel(src, dst model.Name) error {
|
||||
manifests, err := GetManifestPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
destModelPath := ParseModelPath(dest)
|
||||
destPath, err := destModelPath.GetManifestPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(destPath), 0o755); err != nil {
|
||||
dstpath := filepath.Join(manifests, dst.FilepathNoBuild())
|
||||
if err := os.MkdirAll(filepath.Dir(dstpath), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// copy the file
|
||||
input, err := os.ReadFile(srcPath)
|
||||
srcpath := filepath.Join(manifests, src.FilepathNoBuild())
|
||||
srcfile, err := os.Open(srcpath)
|
||||
if err != nil {
|
||||
fmt.Println("Error reading file:", err)
|
||||
return err
|
||||
}
|
||||
defer srcfile.Close()
|
||||
|
||||
err = os.WriteFile(destPath, input, 0o644)
|
||||
dstfile, err := os.Create(dstpath)
|
||||
if err != nil {
|
||||
fmt.Println("Error reading file:", err)
|
||||
return err
|
||||
}
|
||||
defer dstfile.Close()
|
||||
|
||||
return nil
|
||||
_, err = io.Copy(dstfile, srcfile)
|
||||
return err
|
||||
}
|
||||
|
||||
func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}, dryRun bool) error {
|
||||
|
||||
@@ -29,6 +29,7 @@ import (
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/openai"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
@@ -788,34 +789,34 @@ func (s *Server) ListModelsHandler(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (s *Server) CopyModelHandler(c *gin.Context) {
|
||||
var req api.CopyRequest
|
||||
err := c.ShouldBindJSON(&req)
|
||||
switch {
|
||||
case errors.Is(err, io.EOF):
|
||||
var r api.CopyRequest
|
||||
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
||||
return
|
||||
case err != nil:
|
||||
} else if err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if req.Source == "" || req.Destination == "" {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "source add destination are required"})
|
||||
src := model.ParseName(r.Source)
|
||||
if !src.IsValid() {
|
||||
_ = c.Error(fmt.Errorf("source %q is invalid", r.Source))
|
||||
}
|
||||
|
||||
dst := model.ParseName(r.Destination)
|
||||
if !dst.IsValid() {
|
||||
_ = c.Error(fmt.Errorf("destination %q is invalid", r.Destination))
|
||||
}
|
||||
|
||||
if len(c.Errors) > 0 {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": c.Errors.Errors()})
|
||||
return
|
||||
}
|
||||
|
||||
if err := ParseModelPath(req.Destination).Validate(); err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if err := CopyModel(req.Source, req.Destination); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
|
||||
} else {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
}
|
||||
return
|
||||
if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
|
||||
} else if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@ import (
|
||||
type LlmRequest struct {
|
||||
ctx context.Context //nolint:containedctx
|
||||
model *Model
|
||||
ggml *llm.GGML // TODO - how large is this, and do we need to free it after we've finished loading?
|
||||
opts api.Options
|
||||
sessionDuration time.Duration
|
||||
successCh chan *runnerRef
|
||||
@@ -39,7 +38,7 @@ type Scheduler struct {
|
||||
loaded map[string]*runnerRef
|
||||
loadedMu sync.Mutex
|
||||
|
||||
loadFn func(req *LlmRequest, gpus gpu.GpuInfoList)
|
||||
loadFn func(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList)
|
||||
newServerFn func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error)
|
||||
getGpuFn func() gpu.GpuInfoList
|
||||
}
|
||||
@@ -47,6 +46,7 @@ type Scheduler struct {
|
||||
// TODO set this to zero after a release or two, to enable multiple models by default
|
||||
var loadedMax = 1 // Maximum runners; < 1 maps to as many as will fit in VRAM (unlimited for CPU runners)
|
||||
var maxQueuedRequests = 10 // TODO configurable
|
||||
var numParallel = 1
|
||||
|
||||
func InitScheduler(ctx context.Context) *Scheduler {
|
||||
maxRunners := os.Getenv("OLLAMA_MAX_LOADED_MODELS")
|
||||
@@ -58,6 +58,14 @@ func InitScheduler(ctx context.Context) *Scheduler {
|
||||
loadedMax = m
|
||||
}
|
||||
}
|
||||
if onp := os.Getenv("OLLAMA_NUM_PARALLEL"); onp != "" {
|
||||
p, err := strconv.Atoi(onp)
|
||||
if err != nil || p <= 0 {
|
||||
slog.Error("invalid parallel setting, must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
|
||||
} else {
|
||||
numParallel = p
|
||||
}
|
||||
}
|
||||
|
||||
sched := &Scheduler{
|
||||
pendingReqCh: make(chan *LlmRequest, maxQueuedRequests),
|
||||
@@ -74,20 +82,16 @@ func InitScheduler(ctx context.Context) *Scheduler {
|
||||
|
||||
// context must be canceled to decrement ref count and release the runner
|
||||
func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration time.Duration) (chan *runnerRef, chan error) {
|
||||
ggml, err := llm.LoadModel(model.ModelPath)
|
||||
req := &LlmRequest{
|
||||
ctx: c,
|
||||
model: model,
|
||||
ggml: ggml,
|
||||
opts: opts,
|
||||
sessionDuration: sessionDuration,
|
||||
successCh: make(chan *runnerRef),
|
||||
errCh: make(chan error, 1),
|
||||
}
|
||||
if err != nil {
|
||||
req.errCh <- err
|
||||
return req.successCh, req.errCh
|
||||
}
|
||||
// context split across parallel threads
|
||||
opts.NumCtx = opts.NumCtx * numParallel
|
||||
select {
|
||||
case s.pendingReqCh <- req:
|
||||
default:
|
||||
@@ -130,28 +134,39 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||
pending.useLoadedRunner(runner, s.finishedReqCh)
|
||||
break
|
||||
}
|
||||
} else if loadedCount == 0 {
|
||||
slog.Debug("loading first model", "model", pending.model.ModelPath)
|
||||
gpus := s.getGpuFn()
|
||||
g := pickBestFitGPUs(pending, gpus)
|
||||
if g != nil {
|
||||
gpus = g
|
||||
}
|
||||
s.loadFn(pending, gpus)
|
||||
break
|
||||
} else if loadedMax > 0 && loadedCount >= loadedMax {
|
||||
slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount)
|
||||
runnerToExpire = s.findRunnerToUnload(pending)
|
||||
} else {
|
||||
// More than one loaded model, so we have to see if the new one fits
|
||||
// Either no models are loaded or below loadedMax
|
||||
// Get a refreshed GPU list
|
||||
gpus := s.getGpuFn()
|
||||
|
||||
// Load model for fitting
|
||||
ggml, err := llm.LoadModel(pending.model.ModelPath)
|
||||
if err != nil {
|
||||
pending.errCh <- err
|
||||
break
|
||||
}
|
||||
|
||||
// No models loaded. Load the model but prefer the best fit.
|
||||
if loadedCount == 0 {
|
||||
slog.Debug("loading first model", "model", pending.model.ModelPath)
|
||||
g := pickBestFitGPUs(pending, ggml, gpus)
|
||||
if g != nil {
|
||||
gpus = g
|
||||
}
|
||||
s.loadFn(pending, ggml, gpus)
|
||||
break
|
||||
}
|
||||
|
||||
// More than one loaded model, so we have to see if the new one fits
|
||||
// Update free memory from currently loaded models
|
||||
s.updateFreeSpace(gpus)
|
||||
gpus = pickBestFitGPUs(pending, gpus)
|
||||
gpus = pickBestFitGPUs(pending, ggml, gpus)
|
||||
if gpus != nil {
|
||||
slog.Debug("new model fits with existing models, loading")
|
||||
s.loadFn(pending, gpus)
|
||||
s.loadFn(pending, ggml, gpus)
|
||||
break
|
||||
}
|
||||
runnerToExpire = s.findRunnerToUnload(pending)
|
||||
@@ -282,8 +297,8 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
|
||||
}()
|
||||
}
|
||||
|
||||
func (s *Scheduler) load(req *LlmRequest, gpus gpu.GpuInfoList) {
|
||||
llama, err := s.newServerFn(gpus, req.model.ModelPath, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
|
||||
func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) {
|
||||
llama, err := s.newServerFn(gpus, req.model.ModelPath, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
|
||||
if err != nil {
|
||||
// some older models are not compatible with newer versions of llama.cpp
|
||||
// show a generalized compatibility error until there is a better way to
|
||||
@@ -417,16 +432,21 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
|
||||
slog.Debug("evaluating already loaded", "model", req.model.ModelPath)
|
||||
runner.refMu.Lock()
|
||||
defer runner.refMu.Unlock()
|
||||
// Ignore the NumGPU settings for comparison
|
||||
optsExisting := runner.Options.Runner
|
||||
optsExisting.NumGPU = -1
|
||||
optsNew := req.opts.Runner
|
||||
optsNew.NumGPU = -1
|
||||
|
||||
timeout := 10 * time.Second
|
||||
if runner.loading {
|
||||
timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(ctx, timeout) // BUG -
|
||||
|
||||
// Don't reload runner if num_gpu=-1 was provided
|
||||
optsExisting := runner.Options.Runner
|
||||
optsNew := req.opts.Runner
|
||||
if optsNew.NumGPU < 0 {
|
||||
optsExisting.NumGPU = -1
|
||||
optsNew.NumGPU = -1
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
if !reflect.DeepEqual(runner.adapters, req.model.AdapterPaths) || // have the adapters changed?
|
||||
!reflect.DeepEqual(runner.projectors, req.model.ProjectorPaths) || // have the projectors changed?
|
||||
@@ -434,6 +454,7 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
|
||||
runner.llama.Ping(ctx) != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -454,7 +475,7 @@ func (a ByDuration) Less(i, j int) bool {
|
||||
|
||||
// pickBestFitGPUs will try to find the optimal placement of the model in the available GPUs where the model fully fits
|
||||
// If the model can not be fit fully within the available GPU(s) nil is returned
|
||||
func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
|
||||
func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.GpuInfoList {
|
||||
var estimatedVRAM uint64
|
||||
for _, gl := range gpus.ByLibrary() {
|
||||
var ok bool
|
||||
@@ -466,7 +487,7 @@ func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
|
||||
|
||||
// First attempt to fit the model into a single GPU
|
||||
for _, g := range sgl {
|
||||
if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||
if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||
slog.Debug("new model will fit in available VRAM in single GPU, loading", "model", req.model.ModelPath, "gpu", g.ID, "available", g.FreeMemory, "required", format.HumanBytes2(estimatedVRAM))
|
||||
return []gpu.GpuInfo{g}
|
||||
}
|
||||
@@ -477,7 +498,7 @@ func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
|
||||
// - try subsets of GPUs instead of just falling back to 1 or all in a family
|
||||
|
||||
// Now try all the GPUs
|
||||
if ok, estimatedVRAM = llm.PredictServerFit(gl, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||
if ok, estimatedVRAM = llm.PredictServerFit(gl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||
slog.Debug("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", gl[0].Library, "required", format.HumanBytes2(estimatedVRAM))
|
||||
return gl
|
||||
}
|
||||
|
||||
@@ -47,6 +47,7 @@ func TestLoad(t *testing.T) {
|
||||
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
||||
defer done()
|
||||
s := InitScheduler(ctx)
|
||||
var ggml *llm.GGML // value not used in tests
|
||||
req := &LlmRequest{
|
||||
ctx: ctx,
|
||||
model: &Model{ModelPath: "foo"},
|
||||
@@ -59,7 +60,7 @@ func TestLoad(t *testing.T) {
|
||||
return nil, fmt.Errorf("something failed to load model blah")
|
||||
}
|
||||
gpus := gpu.GpuInfoList{}
|
||||
s.load(req, gpus)
|
||||
s.load(req, ggml, gpus)
|
||||
require.Len(t, req.successCh, 0)
|
||||
require.Len(t, req.errCh, 1)
|
||||
require.Len(t, s.loaded, 0)
|
||||
@@ -70,7 +71,7 @@ func TestLoad(t *testing.T) {
|
||||
s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
|
||||
return server, nil
|
||||
}
|
||||
s.load(req, gpus)
|
||||
s.load(req, ggml, gpus)
|
||||
select {
|
||||
case err := <-req.errCh:
|
||||
require.NoError(t, err)
|
||||
@@ -82,7 +83,7 @@ func TestLoad(t *testing.T) {
|
||||
|
||||
req.model.ModelPath = "dummy_model_path"
|
||||
server.waitResp = fmt.Errorf("wait failure")
|
||||
s.load(req, gpus)
|
||||
s.load(req, ggml, gpus)
|
||||
select {
|
||||
case err := <-req.errCh:
|
||||
require.Contains(t, err.Error(), "wait failure")
|
||||
@@ -101,6 +102,7 @@ type bundle struct {
|
||||
ctxDone func()
|
||||
srv *mockLlm
|
||||
req *LlmRequest
|
||||
ggml *llm.GGML
|
||||
}
|
||||
|
||||
func (scenario *bundle) newServer(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
|
||||
@@ -132,14 +134,15 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
|
||||
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
|
||||
})
|
||||
assert.Nil(t, err)
|
||||
|
||||
fname := f.Name()
|
||||
model := &Model{Name: modelName, ModelPath: fname}
|
||||
ggml, err := llm.LoadModel(model.ModelPath)
|
||||
scenario.ggml, err = llm.LoadModel(model.ModelPath)
|
||||
require.NoError(t, err)
|
||||
|
||||
scenario.req = &LlmRequest{
|
||||
ctx: scenario.ctx,
|
||||
model: model,
|
||||
ggml: ggml,
|
||||
sessionDuration: 5 * time.Millisecond,
|
||||
successCh: make(chan *runnerRef, 1),
|
||||
errCh: make(chan error, 1),
|
||||
@@ -157,13 +160,13 @@ func TestRequests(t *testing.T) {
|
||||
scenario1a.req.sessionDuration = 0
|
||||
scenario1b := newScenario(t, ctx, "ollama-model-1", 11)
|
||||
scenario1b.req.model = scenario1a.req.model
|
||||
scenario1b.req.ggml = scenario1a.req.ggml
|
||||
scenario1b.ggml = scenario1a.ggml
|
||||
scenario1b.req.sessionDuration = 0
|
||||
|
||||
// simple reload of same model
|
||||
scenario2a := newScenario(t, ctx, "ollama-model-1", 20)
|
||||
scenario2a.req.model = scenario1a.req.model
|
||||
scenario2a.req.ggml = scenario1a.req.ggml
|
||||
scenario2a.ggml = scenario1a.ggml
|
||||
|
||||
// Multiple loaded models
|
||||
scenario3a := newScenario(t, ctx, "ollama-model-3a", 1*format.GigaByte)
|
||||
@@ -322,13 +325,14 @@ func TestGetRunner(t *testing.T) {
|
||||
successCh1c, errCh1c := s.GetRunner(scenario1c.ctx, scenario1c.req.model, scenario1c.req.opts, scenario1c.req.sessionDuration)
|
||||
require.Len(t, s.pendingReqCh, 0)
|
||||
require.Len(t, successCh1c, 0)
|
||||
require.Len(t, errCh1c, 0)
|
||||
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
require.Len(t, s.loaded, 0)
|
||||
require.Len(t, errCh1c, 1)
|
||||
err = <-errCh1c
|
||||
require.Contains(t, err.Error(), "bad path")
|
||||
scenario1b.ctxDone()
|
||||
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
require.Len(t, s.loaded, 0)
|
||||
}
|
||||
|
||||
// TODO - add one scenario that triggers the bogus finished event with positive ref count
|
||||
@@ -366,7 +370,9 @@ func TestPrematureExpired(t *testing.T) {
|
||||
require.LessOrEqual(t, len(s.finishedReqCh), 1)
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
require.Len(t, s.finishedReqCh, 0)
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 0)
|
||||
s.loadedMu.Unlock()
|
||||
|
||||
// also shouldn't happen in real life
|
||||
s.finishedReqCh <- scenario1a.req
|
||||
@@ -426,7 +432,6 @@ func TestUpdateFreeSpace(t *testing.T) {
|
||||
s.updateFreeSpace(gpus)
|
||||
require.Equal(t, uint64(850), gpus[0].FreeMemory)
|
||||
require.Equal(t, uint64(1850), gpus[1].FreeMemory)
|
||||
|
||||
}
|
||||
|
||||
func TestFindRunnerToUnload(t *testing.T) {
|
||||
@@ -485,6 +490,9 @@ func TestNeedsReload(t *testing.T) {
|
||||
require.False(t, resp)
|
||||
req.opts.NumGPU = 99
|
||||
resp = runner.needsReload(ctx, req)
|
||||
require.True(t, resp)
|
||||
req.opts.NumGPU = -1
|
||||
resp = runner.needsReload(ctx, req)
|
||||
require.False(t, resp)
|
||||
}
|
||||
|
||||
|
||||
@@ -150,7 +150,7 @@ type Name struct {
|
||||
// For any valid s, the fill string is used to fill in missing parts of the
|
||||
// Name. The fill string must be a valid Name with the exception that any part
|
||||
// may be the string ("?"), which will not be considered for filling.
|
||||
func ParseName(s, fill string) Name {
|
||||
func ParseNameFill(s, fill string) Name {
|
||||
var r Name
|
||||
parts(s)(func(kind PartKind, part string) bool {
|
||||
if kind == PartDigest && !ParseDigest(part).IsValid() {
|
||||
@@ -170,6 +170,13 @@ func ParseName(s, fill string) Name {
|
||||
return Name{}
|
||||
}
|
||||
|
||||
// ParseName parses s into a Name, and returns the result of filling it
|
||||
// with FillDefault. The input string must be a valid string representation
|
||||
// of a model
|
||||
func ParseName(s string) Name {
|
||||
return ParseNameFill(s, "")
|
||||
}
|
||||
|
||||
func parseMask(s string) Name {
|
||||
var r Name
|
||||
parts(s)(func(kind PartKind, part string) bool {
|
||||
@@ -187,7 +194,7 @@ func parseMask(s string) Name {
|
||||
}
|
||||
|
||||
func MustParseName(s, fill string) Name {
|
||||
r := ParseName(s, fill)
|
||||
r := ParseNameFill(s, fill)
|
||||
if !r.IsValid() {
|
||||
panic("invalid Name: " + s)
|
||||
}
|
||||
@@ -579,7 +586,11 @@ func (r Name) IsValid() bool {
|
||||
// it trims any leading "/" and then calls [ParseName] with fill.
|
||||
func ParseNameFromURLPath(s, fill string) Name {
|
||||
s = strings.TrimPrefix(s, "/")
|
||||
return ParseName(s, fill)
|
||||
return ParseNameFill(s, fill)
|
||||
}
|
||||
|
||||
func ParseNameFromURLPathFill(s, fill string) Name {
|
||||
return ParseNameFill(s, fill)
|
||||
}
|
||||
|
||||
// URLPath returns a complete, canonicalized, relative URL path using the parts of a
|
||||
|
||||
@@ -119,11 +119,11 @@ func TestNameConsecutiveDots(t *testing.T) {
|
||||
for i := 1; i < 10; i++ {
|
||||
s := strings.Repeat(".", i)
|
||||
if i > 1 {
|
||||
if g := ParseName(s, FillNothing).DisplayLong(); g != "" {
|
||||
if g := ParseNameFill(s, FillNothing).DisplayLong(); g != "" {
|
||||
t.Errorf("ParseName(%q) = %q; want empty string", s, g)
|
||||
}
|
||||
} else {
|
||||
if g := ParseName(s, FillNothing).DisplayLong(); g != s {
|
||||
if g := ParseNameFill(s, FillNothing).DisplayLong(); g != s {
|
||||
t.Errorf("ParseName(%q) = %q; want %q", s, g, s)
|
||||
}
|
||||
}
|
||||
@@ -156,14 +156,14 @@ func TestParseName(t *testing.T) {
|
||||
s := prefix + baseName
|
||||
|
||||
t.Run(s, func(t *testing.T) {
|
||||
name := ParseName(s, FillNothing)
|
||||
name := ParseNameFill(s, FillNothing)
|
||||
got := fieldsFromName(name)
|
||||
if got != want {
|
||||
t.Errorf("ParseName(%q) = %q; want %q", s, got, want)
|
||||
}
|
||||
|
||||
// test round-trip
|
||||
if !ParseName(name.DisplayLong(), FillNothing).EqualFold(name) {
|
||||
if !ParseNameFill(name.DisplayLong(), FillNothing).EqualFold(name) {
|
||||
t.Errorf("ParseName(%q).String() = %s; want %s", s, name.DisplayLong(), baseName)
|
||||
}
|
||||
})
|
||||
@@ -188,7 +188,7 @@ func TestParseNameFill(t *testing.T) {
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
name := ParseName(tt.in, tt.fill)
|
||||
name := ParseNameFill(tt.in, tt.fill)
|
||||
if g := name.DisplayLong(); g != tt.want {
|
||||
t.Errorf("ParseName(%q, %q) = %q; want %q", tt.in, tt.fill, g, tt.want)
|
||||
}
|
||||
@@ -201,7 +201,7 @@ func TestParseNameFill(t *testing.T) {
|
||||
t.Fatal("expected panic")
|
||||
}
|
||||
}()
|
||||
ParseName("x", "^")
|
||||
ParseNameFill("x", "^")
|
||||
})
|
||||
}
|
||||
|
||||
@@ -212,7 +212,7 @@ func TestParseNameHTTPDoublePrefixStrip(t *testing.T) {
|
||||
}
|
||||
for _, s := range cases {
|
||||
t.Run(s, func(t *testing.T) {
|
||||
name := ParseName(s, FillNothing)
|
||||
name := ParseNameFill(s, FillNothing)
|
||||
if name.IsValid() {
|
||||
t.Errorf("expected invalid path; got %#v", name)
|
||||
}
|
||||
@@ -237,7 +237,7 @@ func TestCompleteWithAndWithoutBuild(t *testing.T) {
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
p := ParseName(tt.in, FillNothing)
|
||||
p := ParseNameFill(tt.in, FillNothing)
|
||||
t.Logf("ParseName(%q) = %#v", tt.in, p)
|
||||
if g := p.IsComplete(); g != tt.complete {
|
||||
t.Errorf("Complete(%q) = %v; want %v", tt.in, g, tt.complete)
|
||||
@@ -252,7 +252,7 @@ func TestCompleteWithAndWithoutBuild(t *testing.T) {
|
||||
// inlined when used in Complete, preventing any allocations or
|
||||
// escaping to the heap.
|
||||
allocs := testing.AllocsPerRun(1000, func() {
|
||||
keep(ParseName("complete.com/x/mistral:latest+Q4_0", FillNothing).IsComplete())
|
||||
keep(ParseNameFill("complete.com/x/mistral:latest+Q4_0", FillNothing).IsComplete())
|
||||
})
|
||||
if allocs > 0 {
|
||||
t.Errorf("Complete allocs = %v; want 0", allocs)
|
||||
@@ -269,7 +269,7 @@ func TestNameLogValue(t *testing.T) {
|
||||
t.Run(s, func(t *testing.T) {
|
||||
var b bytes.Buffer
|
||||
log := slog.New(slog.NewTextHandler(&b, nil))
|
||||
name := ParseName(s, FillNothing)
|
||||
name := ParseNameFill(s, FillNothing)
|
||||
log.Info("", "name", name)
|
||||
want := fmt.Sprintf("name=%s", name.GoString())
|
||||
got := b.String()
|
||||
@@ -316,7 +316,7 @@ func TestNameGoString(t *testing.T) {
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
p := ParseName(tt.in, FillNothing)
|
||||
p := ParseNameFill(tt.in, FillNothing)
|
||||
tt.wantGoString = cmp.Or(tt.wantGoString, tt.in)
|
||||
if g := fmt.Sprintf("%#v", p); g != tt.wantGoString {
|
||||
t.Errorf("GoString() = %q; want %q", g, tt.wantGoString)
|
||||
@@ -326,7 +326,7 @@ func TestNameGoString(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestDisplayLongest(t *testing.T) {
|
||||
g := ParseName("example.com/library/mistral:latest+Q4_0", FillNothing).DisplayLongest()
|
||||
g := ParseNameFill("example.com/library/mistral:latest+Q4_0", FillNothing).DisplayLongest()
|
||||
if g != "example.com/library/mistral:latest" {
|
||||
t.Errorf("got = %q; want %q", g, "example.com/library/mistral:latest")
|
||||
}
|
||||
@@ -377,7 +377,7 @@ func TestDisplayShortest(t *testing.T) {
|
||||
}
|
||||
}()
|
||||
|
||||
p := ParseName(tt.in, FillNothing)
|
||||
p := ParseNameFill(tt.in, FillNothing)
|
||||
t.Logf("ParseName(%q) = %#v", tt.in, p)
|
||||
if g := p.DisplayShortest(tt.mask); g != tt.want {
|
||||
t.Errorf("got = %q; want %q", g, tt.want)
|
||||
@@ -388,7 +388,7 @@ func TestDisplayShortest(t *testing.T) {
|
||||
|
||||
func TestParseNameAllocs(t *testing.T) {
|
||||
allocs := testing.AllocsPerRun(1000, func() {
|
||||
keep(ParseName("example.com/mistral:7b+Q4_0", FillNothing))
|
||||
keep(ParseNameFill("example.com/mistral:7b+Q4_0", FillNothing))
|
||||
})
|
||||
if allocs > 0 {
|
||||
t.Errorf("ParseName allocs = %v; want 0", allocs)
|
||||
@@ -399,7 +399,7 @@ func BenchmarkParseName(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
for range b.N {
|
||||
keep(ParseName("example.com/mistral:7b+Q4_0", FillNothing))
|
||||
keep(ParseNameFill("example.com/mistral:7b+Q4_0", FillNothing))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -430,7 +430,7 @@ func FuzzParseName(f *testing.F) {
|
||||
f.Add(":@!@")
|
||||
f.Add("...")
|
||||
f.Fuzz(func(t *testing.T, s string) {
|
||||
r0 := ParseName(s, FillNothing)
|
||||
r0 := ParseNameFill(s, FillNothing)
|
||||
|
||||
if strings.Contains(s, "..") && !r0.IsZero() {
|
||||
t.Fatalf("non-zero value for path with '..': %q", s)
|
||||
@@ -453,7 +453,7 @@ func FuzzParseName(f *testing.F) {
|
||||
t.Errorf("String() did not round-trip with case insensitivity: %q\ngot = %q\nwant = %q", s, r0.DisplayLong(), s)
|
||||
}
|
||||
|
||||
r1 := ParseName(r0.DisplayLong(), FillNothing)
|
||||
r1 := ParseNameFill(r0.DisplayLong(), FillNothing)
|
||||
if !r0.EqualFold(r1) {
|
||||
t.Errorf("round-trip mismatch: %+v != %+v", r0, r1)
|
||||
}
|
||||
@@ -461,7 +461,7 @@ func FuzzParseName(f *testing.F) {
|
||||
}
|
||||
|
||||
func TestNameStringAllocs(t *testing.T) {
|
||||
name := ParseName("example.com/ns/mistral:latest+Q4_0", FillNothing)
|
||||
name := ParseNameFill("example.com/ns/mistral:latest+Q4_0", FillNothing)
|
||||
allocs := testing.AllocsPerRun(1000, func() {
|
||||
keep(name.DisplayLong())
|
||||
})
|
||||
@@ -483,7 +483,7 @@ func TestNamePath(t *testing.T) {
|
||||
}
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
p := ParseName(tt.in, FillNothing)
|
||||
p := ParseNameFill(tt.in, FillNothing)
|
||||
t.Logf("ParseName(%q) = %#v", tt.in, p)
|
||||
if g := p.DisplayURLPath(); g != tt.want {
|
||||
t.Errorf("got = %q; want %q", g, tt.want)
|
||||
@@ -526,7 +526,7 @@ func TestNameFilepath(t *testing.T) {
|
||||
}
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
p := ParseName(tt.in, FillNothing)
|
||||
p := ParseNameFill(tt.in, FillNothing)
|
||||
t.Logf("ParseName(%q) = %#v", tt.in, p)
|
||||
g := p.Filepath()
|
||||
g = filepath.ToSlash(g)
|
||||
@@ -587,7 +587,7 @@ func TestParseNameFilepath(t *testing.T) {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
in := strings.ReplaceAll(tt.in, "/", string(filepath.Separator))
|
||||
fill := cmp.Or(tt.fill, FillNothing)
|
||||
want := ParseName(tt.want, fill)
|
||||
want := ParseNameFill(tt.want, fill)
|
||||
if g := ParseNameFromFilepath(in, fill); !g.EqualFold(want) {
|
||||
t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want)
|
||||
}
|
||||
@@ -645,12 +645,12 @@ func ExampleName_MapHash() {
|
||||
m := map[uint64]bool{}
|
||||
|
||||
// key 1
|
||||
m[ParseName("mistral:latest+q4", FillNothing).MapHash()] = true
|
||||
m[ParseName("miSTRal:latest+Q4", FillNothing).MapHash()] = true
|
||||
m[ParseName("mistral:LATest+Q4", FillNothing).MapHash()] = true
|
||||
m[ParseNameFill("mistral:latest+q4", FillNothing).MapHash()] = true
|
||||
m[ParseNameFill("miSTRal:latest+Q4", FillNothing).MapHash()] = true
|
||||
m[ParseNameFill("mistral:LATest+Q4", FillNothing).MapHash()] = true
|
||||
|
||||
// key 2
|
||||
m[ParseName("mistral:LATest", FillNothing).MapHash()] = true
|
||||
m[ParseNameFill("mistral:LATest", FillNothing).MapHash()] = true
|
||||
|
||||
fmt.Println(len(m))
|
||||
// Output:
|
||||
@@ -659,9 +659,9 @@ func ExampleName_MapHash() {
|
||||
|
||||
func ExampleName_CompareFold_sort() {
|
||||
names := []Name{
|
||||
ParseName("mistral:latest", FillNothing),
|
||||
ParseName("mistRal:7b+q4", FillNothing),
|
||||
ParseName("MIstral:7b", FillNothing),
|
||||
ParseNameFill("mistral:latest", FillNothing),
|
||||
ParseNameFill("mistRal:7b+q4", FillNothing),
|
||||
ParseNameFill("MIstral:7b", FillNothing),
|
||||
}
|
||||
|
||||
slices.SortFunc(names, Name.CompareFold)
|
||||
@@ -682,7 +682,7 @@ func ExampleName_completeAndResolved() {
|
||||
"x/y/z:latest+q4_0",
|
||||
"@sha123-abc",
|
||||
} {
|
||||
name := ParseName(s, FillNothing)
|
||||
name := ParseNameFill(s, FillNothing)
|
||||
fmt.Printf("complete:%v resolved:%v digest:%s\n", name.IsComplete(), name.IsResolved(), name.Digest())
|
||||
}
|
||||
|
||||
@@ -693,7 +693,7 @@ func ExampleName_completeAndResolved() {
|
||||
}
|
||||
|
||||
func ExampleName_DisplayShortest() {
|
||||
name := ParseName("example.com/jmorganca/mistral:latest+Q4_0", FillNothing)
|
||||
name := ParseNameFill("example.com/jmorganca/mistral:latest+Q4_0", FillNothing)
|
||||
|
||||
fmt.Println(name.DisplayShortest("example.com/jmorganca/_:latest"))
|
||||
fmt.Println(name.DisplayShortest("example.com/_/_:latest"))
|
||||
@@ -701,7 +701,7 @@ func ExampleName_DisplayShortest() {
|
||||
fmt.Println(name.DisplayShortest("_/_/_:_"))
|
||||
|
||||
// Default
|
||||
name = ParseName("registry.ollama.ai/library/mistral:latest+Q4_0", FillNothing)
|
||||
name = ParseNameFill("registry.ollama.ai/library/mistral:latest+Q4_0", FillNothing)
|
||||
fmt.Println(name.DisplayShortest(""))
|
||||
|
||||
// Output:
|
||||
|
||||
Reference in New Issue
Block a user