mirror of
https://github.com/ollama/ollama.git
synced 2026-04-27 19:25:55 +02:00
Compare commits
33 Commits
v0.4.2-rc0
...
v0.4.3-rc0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c4b34f2a2a | ||
|
|
c3ff916431 | ||
|
|
3fc1dc0e6f | ||
|
|
7121dfa309 | ||
|
|
5f68fcab12 | ||
|
|
ecf41eed05 | ||
|
|
b8c66d3307 | ||
|
|
303f4bc79e | ||
|
|
d2a25206b1 | ||
|
|
2f0a8c8778 | ||
|
|
bfd30f4286 | ||
|
|
0ef17ede89 | ||
|
|
909a88c5c0 | ||
|
|
f602ab4de4 | ||
|
|
807ace5b1f | ||
|
|
4b8a2e341a | ||
|
|
e66c29261a | ||
|
|
712d63c3f0 | ||
|
|
6cdf27d154 | ||
|
|
5c18e66384 | ||
|
|
35096a7eff | ||
|
|
81d55d3e4d | ||
|
|
a14f76491d | ||
|
|
760cfa27e5 | ||
|
|
c9a5aca3da | ||
|
|
d5da2ab7e8 | ||
|
|
1c04117114 | ||
|
|
8b4b243f5f | ||
|
|
b42a596425 | ||
|
|
4759d879f2 | ||
|
|
d875e99e46 | ||
|
|
8a35bb926e | ||
|
|
a0ea067b63 |
10
Dockerfile
10
Dockerfile
@@ -234,17 +234,13 @@ COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-am
|
||||
COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
|
||||
|
||||
FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
|
||||
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/
|
||||
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/
|
||||
RUN apt-get update && \
|
||||
apt-get install -y ca-certificates && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
|
||||
COPY --from=cpu-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
|
||||
COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
|
||||
COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
|
||||
COPY --from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
|
||||
COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
|
||||
COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
|
||||
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/
|
||||
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/
|
||||
|
||||
|
||||
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
|
||||
|
||||
15
README.md
15
README.md
@@ -333,8 +333,13 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [ARGO](https://github.com/xark-argo/argo) (Locally download and run Ollama and Huggingface models with RAG on Mac/Windows/Linux)
|
||||
- [G1](https://github.com/bklieger-groq/g1) (Prototype of using prompting strategies to improve the LLM's reasoning through o1-like reasoning chains.)
|
||||
- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
|
||||
- [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard and said in the meetings)
|
||||
- [Hexabot](https://github.com/hexastack/hexabot) (A conversational AI builder)
|
||||
- [Reddit Rate]((https://github.com/rapidarchitect/reddit_analyzer)) (Search and Rate Reddit topics with a weighted summation)
|
||||
- [Reddit Rate](https://github.com/rapidarchitect/reddit_analyzer) (Search and Rate Reddit topics with a weighted summation)
|
||||
- [OpenTalkGpt](https://github.com/adarshM84/OpenTalkGpt)
|
||||
- [VT](https://github.com/vinhnx/vt.ai) (A minimal multimodal AI chat app, with dynamic conversation routing. Supports local models via Ollama)
|
||||
- [Witsy](https://github.com/nbonamy/witsy) (An AI Desktop application avaiable for Mac/Windows/Linux)
|
||||
- [Abbey](https://github.com/US-Artificial-Intelligence/abbey) (A configurable AI interface server with notebooks, document storage, and YouTube support)
|
||||
|
||||
### Terminal
|
||||
|
||||
@@ -361,6 +366,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/)
|
||||
- [Ollama Mixture of Experts (MOE) in 50 lines of code](https://github.com/rapidarchitect/ollama_moe)
|
||||
- [vim-intelligence-bridge](https://github.com/pepo-ec/vim-intelligence-bridge) Simple interaction of "Ollama" with the Vim editor
|
||||
- [SwollamaCLI](https://github.com/marcusziade/Swollama) bundled with the Swollama Swift package. [Demo](https://github.com/marcusziade/Swollama?tab=readme-ov-file#cli-usage)
|
||||
- [aichat](https://github.com/sigoden/aichat) All-in-one LLM CLI tool featuring Shell Assistant, Chat-REPL, RAG, AI tools & agents, with access to OpenAI, Claude, Gemini, Ollama, Groq, and more.
|
||||
|
||||
### Apple Vision Pro
|
||||
@@ -412,13 +418,18 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama)
|
||||
- [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama)
|
||||
- [LlamaScript](https://github.com/Project-Llama/llamascript)
|
||||
- [llm-axe](https://github.com/emirsahin1/llm-axe) (Python Toolkit for Building LLM Powered Apps)
|
||||
- [Gollm](https://docs.gollm.co/examples/ollama-example)
|
||||
- [Gollama for Golang](https://github.com/jonathanhecl/gollama)
|
||||
- [Ollamaclient for Golang](https://github.com/xyproto/ollamaclient)
|
||||
- [High-level function abstraction in Go](https://gitlab.com/tozd/go/fun)
|
||||
- [Ollama PHP](https://github.com/ArdaGnsrn/ollama-php)
|
||||
- [Agents-Flex for Java](https://github.com/agents-flex/agents-flex) with [example](https://github.com/agents-flex/agents-flex/tree/main/agents-flex-llm/agents-flex-llm-ollama/src/test/java/com/agentsflex/llm/ollama)
|
||||
- [Ollama for Swift](https://github.com/mattt/ollama-swift)
|
||||
- [Swollama for Swift](https://github.com/marcusziade/Swollama) with [DocC](https://marcusziade.github.io/Swollama/documentation/swollama/)
|
||||
- [GoLamify](https://github.com/prasad89/golamify)
|
||||
- [Ollama for Haskell](https://github.com/tusharad/ollama-haskell)
|
||||
- [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in unified API)
|
||||
|
||||
### Mobile
|
||||
|
||||
@@ -432,6 +443,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Raycast extension](https://github.com/MassimilianoPasquini97/raycast_ollama)
|
||||
- [Discollama](https://github.com/mxyng/discollama) (Discord bot inside the Ollama discord channel)
|
||||
- [Continue](https://github.com/continuedev/continue)
|
||||
- [Vibe](https://github.com/thewh1teagle/vibe) (Transcribe and analyze meetings with Ollama)
|
||||
- [Obsidian Ollama plugin](https://github.com/hinterdupfinger/obsidian-ollama)
|
||||
- [Logseq Ollama plugin](https://github.com/omagdy7/ollama-logseq)
|
||||
- [NotesOllama](https://github.com/andersrex/notesollama) (Apple Notes Ollama plugin)
|
||||
@@ -462,6 +474,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [QodeAssist](https://github.com/Palm1r/QodeAssist) (AI-powered coding assistant plugin for Qt Creator)
|
||||
- [Obsidian Quiz Generator plugin](https://github.com/ECuiDev/obsidian-quiz-generator)
|
||||
- [TextCraft](https://github.com/suncloudsmoon/TextCraft) (Copilot in Word alternative using Ollama)
|
||||
- [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
|
||||
|
||||
### Supported backends
|
||||
|
||||
|
||||
@@ -361,7 +361,7 @@ func (t *winTray) showMenu() error {
|
||||
|
||||
boolRet, _, err = pTrackPopupMenu.Call(
|
||||
uintptr(t.menus[0]),
|
||||
TPM_BOTTOMALIGN|TPM_LEFTALIGN,
|
||||
TPM_BOTTOMALIGN|TPM_LEFTALIGN|TPM_RIGHTBUTTON,
|
||||
uintptr(p.X),
|
||||
uintptr(p.Y),
|
||||
0,
|
||||
|
||||
@@ -67,6 +67,7 @@ const (
|
||||
SW_HIDE = 0
|
||||
TPM_BOTTOMALIGN = 0x0020
|
||||
TPM_LEFTALIGN = 0x0000
|
||||
TPM_RIGHTBUTTON = 0x0002
|
||||
WM_CLOSE = 0x0010
|
||||
WM_USER = 0x0400
|
||||
WS_CAPTION = 0x00C00000
|
||||
|
||||
74
docs/api.md
74
docs/api.md
@@ -830,10 +830,30 @@ Create a model from a [`Modelfile`](./modelfile.md). It is recommended to set `m
|
||||
|
||||
### Parameters
|
||||
|
||||
- `name`: name of the model to create
|
||||
- `model`: name of the model to create
|
||||
- `modelfile` (optional): contents of the Modelfile
|
||||
- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
|
||||
- `path` (optional): path to the Modelfile
|
||||
- `quantize` (optional): quantize a non-quantized (e.g. float16) model
|
||||
|
||||
#### Quantization types
|
||||
|
||||
| Type | Recommended |
|
||||
| --- | :-: |
|
||||
| q2_K | |
|
||||
| q3_K_L | |
|
||||
| q3_K_M | |
|
||||
| q3_K_S | |
|
||||
| q4_0 | |
|
||||
| q4_1 | |
|
||||
| q4_K_M | * |
|
||||
| q4_K_S | |
|
||||
| q5_0 | |
|
||||
| q5_1 | |
|
||||
| q5_K_M | |
|
||||
| q5_K_S | |
|
||||
| q6_K | |
|
||||
| q8_0 | * |
|
||||
|
||||
### Examples
|
||||
|
||||
@@ -845,14 +865,14 @@ Create a new model from a `Modelfile`.
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/create -d '{
|
||||
"name": "mario",
|
||||
"model": "mario",
|
||||
"modelfile": "FROM llama3\nSYSTEM You are mario from Super Mario Bros."
|
||||
}'
|
||||
```
|
||||
|
||||
##### Response
|
||||
|
||||
A stream of JSON objects. Notice that the final JSON object shows a `"status": "success"`.
|
||||
A stream of JSON objects is returned:
|
||||
|
||||
```json
|
||||
{"status":"reading model metadata"}
|
||||
@@ -868,13 +888,43 @@ A stream of JSON objects. Notice that the final JSON object shows a `"status": "
|
||||
{"status":"success"}
|
||||
```
|
||||
|
||||
#### Quantize a model
|
||||
|
||||
Quantize a non-quantized model.
|
||||
|
||||
##### Request
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/create -d '{
|
||||
"model": "llama3.1:quantized",
|
||||
"modelfile": "FROM llama3.1:8b-instruct-fp16",
|
||||
"quantize": "q4_K_M"
|
||||
}'
|
||||
```
|
||||
|
||||
##### Response
|
||||
|
||||
A stream of JSON objects is returned:
|
||||
|
||||
```
|
||||
{"status":"quantizing F16 model to Q4_K_M"}
|
||||
{"status":"creating new layer sha256:667b0c1932bc6ffc593ed1d03f895bf2dc8dc6df21db3042284a6f4416b06a29"}
|
||||
{"status":"using existing layer sha256:11ce4ee3e170f6adebac9a991c22e22ab3f8530e154ee669954c4bc73061c258"}
|
||||
{"status":"using existing layer sha256:0ba8f0e314b4264dfd19df045cde9d4c394a52474bf92ed6a3de22a4ca31a177"}
|
||||
{"status":"using existing layer sha256:56bb8bd477a519ffa694fc449c2413c6f0e1d3b1c88fa7e3c9d88d3ae49d4dcb"}
|
||||
{"status":"creating new layer sha256:455f34728c9b5dd3376378bfb809ee166c145b0b4c1f1a6feca069055066ef9a"}
|
||||
{"status":"writing manifest"}
|
||||
{"status":"success"}
|
||||
```
|
||||
|
||||
|
||||
### Check if a Blob Exists
|
||||
|
||||
```shell
|
||||
HEAD /api/blobs/:digest
|
||||
```
|
||||
|
||||
Ensures that the file blob used for a FROM or ADAPTER field exists on the server. This is checking your Ollama server and not Ollama.ai.
|
||||
Ensures that the file blob used for a FROM or ADAPTER field exists on the server. This is checking your Ollama server and not ollama.com.
|
||||
|
||||
#### Query Parameters
|
||||
|
||||
@@ -979,7 +1029,7 @@ Show information about a model including details, modelfile, template, parameter
|
||||
|
||||
### Parameters
|
||||
|
||||
- `name`: name of the model to show
|
||||
- `model`: name of the model to show
|
||||
- `verbose`: (optional) if set to `true`, returns full data for verbose response fields
|
||||
|
||||
### Examples
|
||||
@@ -988,7 +1038,7 @@ Show information about a model including details, modelfile, template, parameter
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/show -d '{
|
||||
"name": "llama3.2"
|
||||
"model": "llama3.2"
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -1068,7 +1118,7 @@ Delete a model and its data.
|
||||
|
||||
### Parameters
|
||||
|
||||
- `name`: model name to delete
|
||||
- `model`: model name to delete
|
||||
|
||||
### Examples
|
||||
|
||||
@@ -1076,7 +1126,7 @@ Delete a model and its data.
|
||||
|
||||
```shell
|
||||
curl -X DELETE http://localhost:11434/api/delete -d '{
|
||||
"name": "llama3:13b"
|
||||
"model": "llama3:13b"
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -1094,7 +1144,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
|
||||
|
||||
### Parameters
|
||||
|
||||
- `name`: name of the model to pull
|
||||
- `model`: name of the model to pull
|
||||
- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pulling from your own library during development.
|
||||
- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
|
||||
|
||||
@@ -1104,7 +1154,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/pull -d '{
|
||||
"name": "llama3.2"
|
||||
"model": "llama3.2"
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -1166,7 +1216,7 @@ Upload a model to a model library. Requires registering for ollama.ai and adding
|
||||
|
||||
### Parameters
|
||||
|
||||
- `name`: name of the model to push in the form of `<namespace>/<model>:<tag>`
|
||||
- `model`: name of the model to push in the form of `<namespace>/<model>:<tag>`
|
||||
- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development.
|
||||
- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
|
||||
|
||||
@@ -1176,7 +1226,7 @@ Upload a model to a model library. Requires registering for ollama.ai and adding
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/push -d '{
|
||||
"name": "mattw/pygmalion:latest"
|
||||
"model": "mattw/pygmalion:latest"
|
||||
}'
|
||||
```
|
||||
|
||||
|
||||
@@ -50,6 +50,9 @@ sudo systemctl restart docker
|
||||
docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> If you're running on an NVIDIA JetPack system, Ollama can't automatically discover the correct JetPack version. Pass the environment variable JETSON_JETPACK=5 or JETSON_JETPACK=6 to the container to select version 5 or 6.
|
||||
|
||||
### AMD GPU
|
||||
|
||||
To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
|
||||
|
||||
@@ -81,7 +81,7 @@ If you have a GGUF based model or adapter it is possible to import it into Ollam
|
||||
* converting a Safetensors adapter with the `convert_lora_to_gguf.py` from Llama.cpp; or
|
||||
* downloading a model or adapter from a place such as HuggingFace
|
||||
|
||||
To import a GGUF model, create a `Modelfile` containg:
|
||||
To import a GGUF model, create a `Modelfile` containing:
|
||||
|
||||
```dockerfile
|
||||
FROM /path/to/file.gguf
|
||||
|
||||
@@ -112,6 +112,21 @@ sudo systemctl status ollama
|
||||
> https://www.amd.com/en/support/linux-drivers for best support of your Radeon
|
||||
> GPU.
|
||||
|
||||
## Customizing
|
||||
|
||||
To customize the installation of Ollama, you can edit the systemd service file or the environment variables by running:
|
||||
|
||||
```
|
||||
sudo systemctl edit ollama
|
||||
```
|
||||
|
||||
Alternatively, create an override file manually in `/etc/systemd/system/ollama.service.d/override.conf`:
|
||||
|
||||
```ini
|
||||
[Service]
|
||||
Environment="OLLAMA_DEBUG=1"
|
||||
```
|
||||
|
||||
## Updating
|
||||
|
||||
Update Ollama by running the install script again:
|
||||
@@ -129,7 +144,7 @@ sudo tar -C /usr -xzf ollama-linux-amd64.tgz
|
||||
|
||||
## Installing specific versions
|
||||
|
||||
Use `OLLAMA_VERSION` environment variable with the install script to install a specific version of Ollama, including pre-releases. You can find the version numbers in the [releases page](https://github.com/ollama/ollama/releases).
|
||||
Use `OLLAMA_VERSION` environment variable with the install script to install a specific version of Ollama, including pre-releases. You can find the version numbers in the [releases page](https://github.com/ollama/ollama/releases).
|
||||
|
||||
For example:
|
||||
|
||||
|
||||
@@ -10,7 +10,38 @@ import (
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestLongInputContext(t *testing.T) {
|
||||
// Setting NUM_PARALLEL to 1 ensures the allocated context is exactly what
|
||||
// we asked for and there is nothing extra that we could spill over into
|
||||
t.Setenv("OLLAMA_NUM_PARALLEL", "1")
|
||||
|
||||
// Longer needed for small footprint GPUs
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
defer cancel()
|
||||
// Set up the test data
|
||||
req := api.GenerateRequest{
|
||||
Model: "llama2",
|
||||
Prompt: "Oh, don’t speak to me of Austria. Perhaps I don’t understand things, but Austria never has wished, and does not wish, for war. She is betraying us! Russia alone must save Europe. Our gracious sovereign recognizes his high vocation and will be true to it. That is the one thing I have faith in! Our good and wonderful sovereign has to perform the noblest role on earth, and he is so virtuous and noble that God will not forsake him. He will fulfill his vocation and crush the hydra of revolution, which has become more terrible than ever in the person of this murderer and villain! We alone must avenge the blood of the just one.... Whom, I ask you, can we rely on?... England with her commercial spirit will not and cannot understand the Emperor Alexander’s loftiness of soul. She has refused to evacuate Malta. She wanted to find, and still seeks, some secret motive in our actions. What answer did Novosíltsev get? None. The English have not understood and cannot understand the self-abnegation of our Emperor who wants nothing for himself, but only desires the good of mankind. And what have they promised? Nothing! And what little they have promised they will not perform! Prussia has always declared that Buonaparte is invincible, and that all Europe is powerless before him.... And I don’t believe a word that Hardenburg says, or Haugwitz either. This famous Prussian neutrality is just a trap. I have faith only in God and the lofty destiny of our adored monarch. He will save Europe! What country is this referring to?",
|
||||
Stream: &stream,
|
||||
Options: map[string]interface{}{
|
||||
"temperature": 0,
|
||||
"seed": 123,
|
||||
"num_ctx": 128,
|
||||
},
|
||||
}
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatalf("PullIfMissing failed: %v", err)
|
||||
}
|
||||
DoGenerate(ctx, t, client, req, []string{"russia", "germany", "france", "england", "austria", "prussia"}, 120*time.Second, 10*time.Second)
|
||||
}
|
||||
|
||||
func TestContextExhaustion(t *testing.T) {
|
||||
// Setting NUM_PARALLEL to 1 ensures the allocated context is exactly what
|
||||
// we asked for and there is nothing extra that we could spill over into
|
||||
t.Setenv("OLLAMA_NUM_PARALLEL", "1")
|
||||
|
||||
// Longer needed for small footprint GPUs
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
@@ -157,9 +157,7 @@ type Context struct {
|
||||
numThreads int
|
||||
}
|
||||
|
||||
func (c *Context) KvCacheClear() {
|
||||
C.llama_kv_cache_clear(c.c)
|
||||
}
|
||||
var ErrKvCacheFull = errors.New("could not find a kv cache slot")
|
||||
|
||||
func (c *Context) Decode(batch *Batch) error {
|
||||
// Positive return values does not mean a fatal error, but rather a warning.
|
||||
@@ -173,7 +171,7 @@ func (c *Context) Decode(batch *Batch) error {
|
||||
}
|
||||
|
||||
if code > 0 {
|
||||
return fmt.Errorf("could not find a KV slot for the batch - try reducing the size of the batch or increase the context. code: %d", code)
|
||||
return ErrKvCacheFull
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -195,6 +193,14 @@ func (c *Context) KvCacheSeqCp(srcSeqId int, dstSeqId int, p0 int, p1 int) {
|
||||
C.llama_kv_cache_seq_cp(c.c, C.int(srcSeqId), C.int(dstSeqId), C.int(p0), C.int(p1))
|
||||
}
|
||||
|
||||
func (c *Context) KvCacheClear() {
|
||||
C.llama_kv_cache_clear(c.c)
|
||||
}
|
||||
|
||||
func (c *Context) KvCacheDefrag() {
|
||||
C.llama_kv_cache_defrag(c.c)
|
||||
}
|
||||
|
||||
// Get the embeddings for a sequence id
|
||||
func (c *Context) GetEmbeddingsSeq(seqId int) []float32 {
|
||||
embeddings := unsafe.Pointer(C.llama_get_embeddings_seq(c.c, C.int(seqId)))
|
||||
@@ -384,6 +390,8 @@ func (b *Batch) Add(token int, embed []float32, pos int, logits bool, seqIds ...
|
||||
|
||||
if logits {
|
||||
unsafe.Slice(b.c.logits, b.allocSize())[b.c.n_tokens] = 1
|
||||
} else {
|
||||
unsafe.Slice(b.c.logits, b.allocSize())[b.c.n_tokens] = 0
|
||||
}
|
||||
|
||||
b.c.n_tokens += 1
|
||||
|
||||
@@ -203,7 +203,11 @@ func countCommonPrefix(a []input, b []input) int {
|
||||
// the newest half into that space (saving numKeep inputs at the beginning).
|
||||
//
|
||||
// Assumes that at least 1 entry can be freed up by shifting (i.e. numKeep < numCtx)
|
||||
func (c *InputCache) ShiftCacheSlot(slot *InputCacheSlot, numKeep int) {
|
||||
func (c *InputCache) ShiftCacheSlot(slot *InputCacheSlot, numKeep int) error {
|
||||
if numKeep >= c.numCtx {
|
||||
return fmt.Errorf("unable to shift context - keep exceeds context (keep: %v context: %v)", numKeep, c.numCtx)
|
||||
}
|
||||
|
||||
targetFree := (c.numCtx - numKeep) / 2
|
||||
targetFree = max(targetFree, 1)
|
||||
|
||||
@@ -211,18 +215,22 @@ func (c *InputCache) ShiftCacheSlot(slot *InputCacheSlot, numKeep int) {
|
||||
discard := targetFree - currentFree
|
||||
|
||||
if discard <= 0 {
|
||||
return
|
||||
return nil
|
||||
}
|
||||
|
||||
slog.Debug("context limit hit - shifting", "limit", c.numCtx, "input", len(slot.Inputs),
|
||||
slog.Debug("context limit hit - shifting", "id", slot.Id, "limit", c.numCtx, "input", len(slot.Inputs),
|
||||
"keep", numKeep, "discard", discard)
|
||||
|
||||
// TODO (jessegross): KV cache removal can fail for certain types of models
|
||||
c.lc.KvCacheSeqRm(slot.Id, numKeep, numKeep+discard)
|
||||
if !c.lc.KvCacheSeqRm(slot.Id, numKeep, numKeep+discard) {
|
||||
return fmt.Errorf("unable to remove old kv cache entries (id: %v, keep: %v discard: %v)", slot.Id, numKeep, discard)
|
||||
}
|
||||
c.lc.KvCacheSeqAdd(slot.Id, numKeep+discard, len(slot.Inputs), -discard)
|
||||
|
||||
for i := numKeep + discard; i < len(slot.Inputs); i++ {
|
||||
slot.Inputs[i-discard] = slot.Inputs[i]
|
||||
}
|
||||
slot.Inputs = slot.Inputs[:len(slot.Inputs)-discard]
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -45,6 +45,9 @@ type Sequence struct {
|
||||
// prompt inputs left to evaluate
|
||||
inputs []input
|
||||
|
||||
// inputs that have been added to a batch but not yet submitted to Decode
|
||||
pendingInputs []input
|
||||
|
||||
// tokens that have been generated but not returned yet (e.g. for stop sequences)
|
||||
pendingResponses []string
|
||||
|
||||
@@ -119,7 +122,10 @@ func (s *Server) NewSequence(prompt string, images []ImageData, params NewSequen
|
||||
params.numKeep = min(params.numKeep, s.cache.numCtx-1)
|
||||
|
||||
if len(inputs) > s.cache.numCtx {
|
||||
slog.Warn("input exceeds context length", "prompt", len(inputs), "limit", s.cache.numCtx)
|
||||
slog.Warn("truncating input prompt", "limit", s.cache.numCtx, "prompt", len(inputs), "numKeep", params.numKeep)
|
||||
newInputs := inputs[:params.numKeep]
|
||||
newInputs = append(newInputs, inputs[len(inputs)-s.cache.numCtx+params.numKeep:]...)
|
||||
inputs = newInputs
|
||||
}
|
||||
|
||||
var sc *llama.SamplingContext
|
||||
@@ -324,7 +330,11 @@ func (s *Server) run(ctx context.Context) {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
s.processBatch(tokenBatch, embedBatch)
|
||||
err := s.processBatch(tokenBatch, embedBatch)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
tokenBatch.Clear()
|
||||
embedBatch.Clear()
|
||||
}
|
||||
@@ -338,7 +348,7 @@ func (s *Server) run(ctx context.Context) {
|
||||
// these should instead be handled by the handlers
|
||||
// it should only be responsible for accepting tokens or embeddings and
|
||||
// processing batches as fast as possible
|
||||
func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch) {
|
||||
func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch) error {
|
||||
s.mu.Lock()
|
||||
for s.allNil() {
|
||||
s.cond.Wait() // Wait until an item is added
|
||||
@@ -363,14 +373,13 @@ func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch)
|
||||
continue
|
||||
}
|
||||
|
||||
var numInputsProcessed int
|
||||
shifted := false
|
||||
|
||||
for i, input := range seq.inputs {
|
||||
if len(seq.cache.Inputs)+1 > s.cache.numCtx {
|
||||
if !shifted {
|
||||
s.cache.ShiftCacheSlot(seq.cache, seq.numKeep)
|
||||
shifted = true
|
||||
if len(seq.cache.Inputs)+len(seq.pendingInputs)+1 > s.cache.numCtx {
|
||||
if len(seq.pendingInputs) == 0 {
|
||||
err := s.cache.ShiftCacheSlot(seq.cache, seq.numKeep)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
@@ -399,27 +408,30 @@ func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch)
|
||||
}
|
||||
|
||||
crossAttention = seq.crossAttention
|
||||
batch.Add(input.token, input.embed, len(seq.cache.Inputs), i+1 == len(seq.inputs), seq.cache.Id)
|
||||
seq.cache.Inputs = append(seq.cache.Inputs, input)
|
||||
numInputsProcessed++
|
||||
}
|
||||
|
||||
if numInputsProcessed > 0 {
|
||||
seq.inputs = seq.inputs[numInputsProcessed:]
|
||||
batch.Add(input.token, input.embed, len(seq.cache.Inputs)+len(seq.pendingInputs), i+1 == len(seq.inputs), seq.cache.Id)
|
||||
seq.pendingInputs = append(seq.pendingInputs, input)
|
||||
seq.iBatch = batch.NumTokens() - 1
|
||||
}
|
||||
|
||||
seq.inputs = seq.inputs[len(seq.pendingInputs):]
|
||||
}
|
||||
|
||||
if batch == nil || batch.NumTokens() == 0 {
|
||||
return
|
||||
return nil
|
||||
}
|
||||
|
||||
s.lc.SetCrossAttention(crossAttention)
|
||||
|
||||
err := s.lc.Decode(batch)
|
||||
if err != nil {
|
||||
slog.Error("failed to decode batch", "error", err)
|
||||
return
|
||||
if errors.Is(err, llama.ErrKvCacheFull) {
|
||||
slog.Debug("defragmenting kv cache")
|
||||
s.cache.lc.KvCacheDefrag()
|
||||
err = s.lc.Decode(batch)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to decode batch: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if crossAttention {
|
||||
@@ -434,6 +446,12 @@ func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch)
|
||||
continue
|
||||
}
|
||||
|
||||
// After calling Decode, pending inputs are now in the cache
|
||||
if len(seq.pendingInputs) > 0 {
|
||||
seq.cache.Inputs = append(seq.cache.Inputs, seq.pendingInputs...)
|
||||
seq.pendingInputs = []input{}
|
||||
}
|
||||
|
||||
// don't sample prompt processing
|
||||
if len(seq.inputs) != 0 {
|
||||
continue
|
||||
@@ -446,7 +464,7 @@ func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch)
|
||||
|
||||
// if done processing the prompt, generate an embedding and return
|
||||
if seq.embeddingOnly {
|
||||
embed := s.lc.GetEmbeddingsSeq(i)
|
||||
embed := s.lc.GetEmbeddingsSeq(seq.cache.Id)
|
||||
if embed == nil {
|
||||
embed = s.lc.GetEmbeddingsIth(seq.iBatch)
|
||||
}
|
||||
@@ -516,6 +534,8 @@ func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch)
|
||||
s.removeSequence(i, "connection")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO (jmorganca): use structs from the api package to avoid duplication
|
||||
|
||||
@@ -838,13 +838,15 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
if strings.Contains(err.Error(), "unexpected EOF") {
|
||||
if strings.Contains(err.Error(), "unexpected EOF") || strings.Contains(err.Error(), "forcibly closed") {
|
||||
s.Close()
|
||||
msg := ""
|
||||
var msg string
|
||||
if s.status != nil && s.status.LastErrMsg != "" {
|
||||
msg = s.status.LastErrMsg
|
||||
} else {
|
||||
msg = err.Error()
|
||||
}
|
||||
return fmt.Errorf("an unknown error was encountered while running the model %s", msg)
|
||||
return fmt.Errorf("an error was encountered while running the model: %s", msg)
|
||||
}
|
||||
|
||||
return fmt.Errorf("error reading llm response: %v", err)
|
||||
@@ -1092,7 +1094,9 @@ func (s *llmServer) EstimatedTotal() uint64 {
|
||||
func (s *llmServer) EstimatedVRAMByGPU(gpuID string) uint64 {
|
||||
for i, gpu := range s.gpus {
|
||||
if gpu.ID == gpuID {
|
||||
return s.estimate.GPUSizes[i]
|
||||
if i < len(s.estimate.GPUSizes) {
|
||||
return s.estimate.GPUSizes[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
|
||||
@@ -27,6 +27,7 @@ var errorPrefixes = []string{
|
||||
"\"ERR\"",
|
||||
"error loading model",
|
||||
"GGML_ASSERT",
|
||||
"Deepseek2 does not support K-shift",
|
||||
}
|
||||
|
||||
func (w *StatusWriter) Write(b []byte) (int, error) {
|
||||
|
||||
@@ -4,9 +4,12 @@
|
||||
|
||||
set -eu
|
||||
|
||||
red="$( (/usr/bin/tput bold || :; /usr/bin/tput setaf 1 || :) 2>&-)"
|
||||
plain="$( (/usr/bin/tput sgr0 || :) 2>&-)"
|
||||
|
||||
status() { echo ">>> $*" >&2; }
|
||||
error() { echo "ERROR $*"; exit 1; }
|
||||
warning() { echo "WARNING: $*"; }
|
||||
error() { echo "${red}ERROR:${plain} $*"; exit 1; }
|
||||
warning() { echo "${red}WARNING:${plain} $*"; }
|
||||
|
||||
TEMP_DIR=$(mktemp -d)
|
||||
cleanup() { rm -rf $TEMP_DIR; }
|
||||
@@ -93,6 +96,22 @@ else
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for NVIDIA JetPack systems with additional downloads
|
||||
if [ -f /etc/nv_tegra_release ] ; then
|
||||
if grep R36 /etc/nv_tegra_release > /dev/null ; then
|
||||
status "Downloading JetPack 6 components"
|
||||
curl --fail --show-error --location --progress-bar \
|
||||
"https://ollama.com/download/ollama-linux-${ARCH}-jetpack6.tgz${VER_PARAM}" | \
|
||||
$SUDO tar -xzf - -C "$OLLAMA_INSTALL_DIR"
|
||||
elif grep R35 /etc/nv_tegra_release > /dev/null ; then
|
||||
status "Downloading JetPack 5 components"
|
||||
curl --fail --show-error --location --progress-bar \
|
||||
"https://ollama.com/download/ollama-linux-${ARCH}-jetpack5.tgz${VER_PARAM}" | \
|
||||
$SUDO tar -xzf - -C "$OLLAMA_INSTALL_DIR"
|
||||
else
|
||||
warning "Unsupported JetPack version detected. GPU may not be supported"
|
||||
fi
|
||||
fi
|
||||
|
||||
install_success() {
|
||||
status 'The Ollama API is now available at 127.0.0.1:11434.'
|
||||
@@ -146,6 +165,12 @@ EOF
|
||||
start_service() { $SUDO systemctl restart ollama; }
|
||||
trap start_service EXIT
|
||||
;;
|
||||
*)
|
||||
warning "systemd is not running"
|
||||
if [ "$IS_WSL2" = true ]; then
|
||||
warning "see https://learn.microsoft.com/en-us/windows/wsl/systemd#how-to-enable-systemd to enable it"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
@@ -163,6 +188,13 @@ if [ "$IS_WSL2" = true ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Don't attempt to install drivers on Jetson systems
|
||||
if [ -f /etc/nv_tegra_release ] ; then
|
||||
status "NVIDIA JetPack ready."
|
||||
install_success
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Install GPU dependencies on Linux
|
||||
if ! available lspci && ! available lshw; then
|
||||
warning "Unable to detect NVIDIA/AMD GPU. Install lspci or lshw to automatically detect and install GPU dependencies."
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"io"
|
||||
"log"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
@@ -1071,6 +1072,21 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
|
||||
return nil, errUnauthorized
|
||||
}
|
||||
|
||||
// testMakeRequestDialContext specifies the dial function for the http client in
|
||||
// makeRequest. It can be used to resolve hosts in model names to local
|
||||
// addresses for testing. For example, the model name ("example.com/my/model")
|
||||
// can be directed to push/pull from "127.0.0.1:1234".
|
||||
//
|
||||
// This is not safe to set across goroutines. It should be set in
|
||||
// the main test goroutine, and not by tests marked to run in parallel with
|
||||
// t.Parallel().
|
||||
//
|
||||
// It should be cleared after use, otherwise it will affect other tests.
|
||||
//
|
||||
// Ideally we would have some set this up the stack, but the code is not
|
||||
// structured in a way that makes this easy, so this will have to do for now.
|
||||
var testMakeRequestDialContext func(ctx context.Context, network, addr string) (net.Conn, error)
|
||||
|
||||
func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *registryOptions) (*http.Response, error) {
|
||||
if requestURL.Scheme != "http" && regOpts != nil && regOpts.Insecure {
|
||||
requestURL.Scheme = "http"
|
||||
@@ -1105,6 +1121,9 @@ func makeRequest(ctx context.Context, method string, requestURL *url.URL, header
|
||||
}
|
||||
|
||||
resp, err := (&http.Client{
|
||||
Transport: &http.Transport{
|
||||
DialContext: testMakeRequestDialContext,
|
||||
},
|
||||
CheckRedirect: regOpts.CheckRedirect,
|
||||
}).Do(req)
|
||||
if err != nil {
|
||||
|
||||
@@ -32,7 +32,7 @@ func TestChatPrompt(t *testing.T) {
|
||||
mllamaModel := Model{Template: tmpl, ProjectorPaths: []string{"vision"}, Config: ConfigV2{ModelFamilies: []string{"mllama"}}}
|
||||
|
||||
createImg := func(width, height int) ([]byte, error) {
|
||||
img := image.NewRGBA(image.Rect(0, 0, 5, 5))
|
||||
img := image.NewRGBA(image.Rect(0, 0, width, height))
|
||||
var buf bytes.Buffer
|
||||
|
||||
if err := png.Encode(&buf, img); err != nil {
|
||||
|
||||
@@ -507,7 +507,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
|
||||
embedding, err := r.Embedding(c.Request.Context(), req.Prompt)
|
||||
if err != nil {
|
||||
slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Errorf("failed to generate embedding: %v", err)})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -540,7 +540,8 @@ func (s *Server) PullHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if err := checkNameExists(name); err != nil {
|
||||
name, err = getExistingName(name)
|
||||
if err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
@@ -621,19 +622,20 @@ func (s *Server) PushHandler(c *gin.Context) {
|
||||
streamResponse(c, ch)
|
||||
}
|
||||
|
||||
func checkNameExists(name model.Name) error {
|
||||
names, err := Manifests(true)
|
||||
// getExistingName returns the original, on disk name if the input name is a
|
||||
// case-insensitive match, otherwise it returns the input name.
|
||||
func getExistingName(n model.Name) (model.Name, error) {
|
||||
var zero model.Name
|
||||
existing, err := Manifests(true)
|
||||
if err != nil {
|
||||
return err
|
||||
return zero, err
|
||||
}
|
||||
|
||||
for n := range names {
|
||||
if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name {
|
||||
return errors.New("a model with that name already exists")
|
||||
for e := range existing {
|
||||
if n.EqualFold(e) {
|
||||
return e, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (s *Server) CreateHandler(c *gin.Context) {
|
||||
@@ -652,7 +654,8 @@ func (s *Server) CreateHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if err := checkNameExists(name); err != nil {
|
||||
name, err := getExistingName(name)
|
||||
if err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
@@ -958,14 +961,19 @@ func (s *Server) CopyHandler(c *gin.Context) {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("source %q is invalid", r.Source)})
|
||||
return
|
||||
}
|
||||
src, err := getExistingName(src)
|
||||
if err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
dst := model.ParseName(r.Destination)
|
||||
if !dst.IsValid() {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("destination %q is invalid", r.Destination)})
|
||||
return
|
||||
}
|
||||
|
||||
if err := checkNameExists(dst); err != nil {
|
||||
dst, err = getExistingName(dst)
|
||||
if err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -7,13 +7,18 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"math"
|
||||
"math/rand/v2"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/llm"
|
||||
@@ -473,83 +478,129 @@ func Test_Routes(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCase(t *testing.T) {
|
||||
func casingShuffle(s string) string {
|
||||
rr := []rune(s)
|
||||
for i := range rr {
|
||||
if rand.N(2) == 0 {
|
||||
rr[i] = unicode.ToUpper(rr[i])
|
||||
} else {
|
||||
rr[i] = unicode.ToLower(rr[i])
|
||||
}
|
||||
}
|
||||
return string(rr)
|
||||
}
|
||||
|
||||
func TestManifestCaseSensitivity(t *testing.T) {
|
||||
t.Setenv("OLLAMA_MODELS", t.TempDir())
|
||||
|
||||
cases := []string{
|
||||
"mistral",
|
||||
"llama3:latest",
|
||||
"library/phi3:q4_0",
|
||||
"registry.ollama.ai/library/gemma:q5_K_M",
|
||||
// TODO: host:port currently fails on windows (#4107)
|
||||
// "localhost:5000/alice/bob:latest",
|
||||
r := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
io.WriteString(w, `{}`) //nolint:errcheck
|
||||
}))
|
||||
defer r.Close()
|
||||
|
||||
nameUsed := make(map[string]bool)
|
||||
name := func() string {
|
||||
const fqmn = "example/namespace/model:tag"
|
||||
for {
|
||||
v := casingShuffle(fqmn)
|
||||
if nameUsed[v] {
|
||||
continue
|
||||
}
|
||||
nameUsed[v] = true
|
||||
return v
|
||||
}
|
||||
}
|
||||
|
||||
wantStableName := name()
|
||||
|
||||
// checkManifestList tests that there is strictly one manifest in the
|
||||
// models directory, and that the manifest is for the model under test.
|
||||
checkManifestList := func() {
|
||||
t.Helper()
|
||||
|
||||
mandir := filepath.Join(os.Getenv("OLLAMA_MODELS"), "manifests/")
|
||||
var entries []string
|
||||
t.Logf("dir entries:")
|
||||
fsys := os.DirFS(mandir)
|
||||
err := fs.WalkDir(fsys, ".", func(path string, info fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t.Logf(" %s", fs.FormatDirEntry(info))
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
path = strings.TrimPrefix(path, mandir)
|
||||
entries = append(entries, path)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to walk directory: %v", err)
|
||||
}
|
||||
|
||||
if len(entries) != 1 {
|
||||
t.Errorf("len(got) = %d, want 1", len(entries))
|
||||
return // do not use Fatal so following steps run
|
||||
}
|
||||
|
||||
g := entries[0] // raw path
|
||||
g = filepath.ToSlash(g)
|
||||
w := model.ParseName(wantStableName).Filepath()
|
||||
w = filepath.ToSlash(w)
|
||||
if g != w {
|
||||
t.Errorf("\ngot: %s\nwant: %s", g, w)
|
||||
}
|
||||
}
|
||||
|
||||
checkOK := func(w *httptest.ResponseRecorder) {
|
||||
t.Helper()
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("code = %d, want 200", w.Code)
|
||||
t.Logf("body: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
var s Server
|
||||
for _, tt := range cases {
|
||||
t.Run(tt, func(t *testing.T) {
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: tt,
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status 200 got %d", w.Code)
|
||||
}
|
||||
|
||||
expect, err := json.Marshal(map[string]string{"error": "a model with that name already exists"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
t.Run("create", func(t *testing.T) {
|
||||
w = createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: strings.ToUpper(tt),
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status 500 got %d", w.Code)
|
||||
}
|
||||
|
||||
if !bytes.Equal(w.Body.Bytes(), expect) {
|
||||
t.Fatalf("expected error %s got %s", expect, w.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("pull", func(t *testing.T) {
|
||||
w := createRequest(t, s.PullHandler, api.PullRequest{
|
||||
Name: strings.ToUpper(tt),
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status 500 got %d", w.Code)
|
||||
}
|
||||
|
||||
if !bytes.Equal(w.Body.Bytes(), expect) {
|
||||
t.Fatalf("expected error %s got %s", expect, w.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("copy", func(t *testing.T) {
|
||||
w := createRequest(t, s.CopyHandler, api.CopyRequest{
|
||||
Source: tt,
|
||||
Destination: strings.ToUpper(tt),
|
||||
})
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status 500 got %d", w.Code)
|
||||
}
|
||||
|
||||
if !bytes.Equal(w.Body.Bytes(), expect) {
|
||||
t.Fatalf("expected error %s got %s", expect, w.Body.String())
|
||||
}
|
||||
})
|
||||
})
|
||||
testMakeRequestDialContext = func(ctx context.Context, _, _ string) (net.Conn, error) {
|
||||
var d net.Dialer
|
||||
return d.DialContext(ctx, "tcp", r.Listener.Addr().String())
|
||||
}
|
||||
t.Cleanup(func() { testMakeRequestDialContext = nil })
|
||||
|
||||
t.Logf("creating")
|
||||
checkOK(createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
// Start with the stable name, and later use a case-shuffled
|
||||
// version.
|
||||
Name: wantStableName,
|
||||
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
}))
|
||||
checkManifestList()
|
||||
|
||||
t.Logf("creating (again)")
|
||||
checkOK(createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: name(),
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
}))
|
||||
checkManifestList()
|
||||
|
||||
t.Logf("pulling")
|
||||
checkOK(createRequest(t, s.PullHandler, api.PullRequest{
|
||||
Name: name(),
|
||||
Stream: &stream,
|
||||
Insecure: true,
|
||||
}))
|
||||
checkManifestList()
|
||||
|
||||
t.Logf("copying")
|
||||
checkOK(createRequest(t, s.CopyHandler, api.CopyRequest{
|
||||
Source: name(),
|
||||
Destination: name(),
|
||||
}))
|
||||
checkManifestList()
|
||||
}
|
||||
|
||||
func TestShow(t *testing.T) {
|
||||
|
||||
@@ -298,6 +298,13 @@ func (n Name) LogValue() slog.Value {
|
||||
return slog.StringValue(n.String())
|
||||
}
|
||||
|
||||
func (n Name) EqualFold(o Name) bool {
|
||||
return strings.EqualFold(n.Host, o.Host) &&
|
||||
strings.EqualFold(n.Namespace, o.Namespace) &&
|
||||
strings.EqualFold(n.Model, o.Model) &&
|
||||
strings.EqualFold(n.Tag, o.Tag)
|
||||
}
|
||||
|
||||
func isValidLen(kind partKind, s string) bool {
|
||||
switch kind {
|
||||
case kindHost:
|
||||
|
||||
Reference in New Issue
Block a user