mirror of
https://github.com/ollama/ollama.git
synced 2026-04-26 18:55:53 +02:00
Compare commits
97 Commits
v0.1.33-rc
...
mxyng/spli
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dc474f9b83 | ||
|
|
41ae232e10 | ||
|
|
122b35c784 | ||
|
|
3244a25c79 | ||
|
|
b535afe35c | ||
|
|
fd071eab8b | ||
|
|
da0bb5d772 | ||
|
|
1909e624ce | ||
|
|
1d8c850f38 | ||
|
|
e9ae607ece | ||
|
|
93707fa3f2 | ||
|
|
94c369095f | ||
|
|
9164b0161b | ||
|
|
bf4fc25f7b | ||
|
|
5b806d8d24 | ||
|
|
cb1e072643 | ||
|
|
45b6a12e45 | ||
|
|
68755f1f5e | ||
|
|
997a455039 | ||
|
|
88775e1ff9 | ||
|
|
8867e744ff | ||
|
|
4fd064bea6 | ||
|
|
59fbceedcc | ||
|
|
321d57e1a0 | ||
|
|
ba26c7aa00 | ||
|
|
63c763685f | ||
|
|
34a4a94f13 | ||
|
|
f4a73d57a4 | ||
|
|
948114e3e3 | ||
|
|
a3e60d9058 | ||
|
|
5ea844964e | ||
|
|
bd8eed57fc | ||
|
|
9cf0f2e973 | ||
|
|
176ad3aa6e | ||
|
|
4d08363580 | ||
|
|
8907bf51d2 | ||
|
|
abe614c705 | ||
|
|
238715037d | ||
|
|
c0a00f68ae | ||
|
|
f0c454ab57 | ||
|
|
b9f74ff3d6 | ||
|
|
fcf4d60eee | ||
|
|
e33d5c2dbc | ||
|
|
18d9a7e1f1 | ||
|
|
8488388cbd | ||
|
|
588901f449 | ||
|
|
0a7fdbe533 | ||
|
|
5950c176ca | ||
|
|
23d23409a0 | ||
|
|
9009bedf13 | ||
|
|
d4ac57e240 | ||
|
|
7b59d1770f | ||
|
|
95ead8ffba | ||
|
|
7aa08a77ca | ||
|
|
7e432cdfac | ||
|
|
586672f490 | ||
|
|
b03408de74 | ||
|
|
1e6a28bf5b | ||
|
|
d6e3b64582 | ||
|
|
114c932a8e | ||
|
|
7f7103de06 | ||
|
|
c631a9c726 | ||
|
|
8fd9e56804 | ||
|
|
8a65717f55 | ||
|
|
6d3152a98a | ||
|
|
b438d485f1 | ||
|
|
204349b17b | ||
|
|
86e67fc4a9 | ||
|
|
2bed62926e | ||
|
|
aad8d128a0 | ||
|
|
ec1acbb867 | ||
|
|
e4859c4563 | ||
|
|
8e30eb26bd | ||
|
|
0b5c589ca2 | ||
|
|
65fadddc85 | ||
|
|
ed5fb088c4 | ||
|
|
f81f308118 | ||
|
|
b1390a7b37 | ||
|
|
11d83386a5 | ||
|
|
bb31def011 | ||
|
|
41e03ede95 | ||
|
|
7fea1ecdf6 | ||
|
|
054894271d | ||
|
|
6fef042f0b | ||
|
|
5c0c2d1d09 | ||
|
|
37f9c8ad99 | ||
|
|
2a80f55e2a | ||
|
|
421c878a2d | ||
|
|
36666c2142 | ||
|
|
85801317d1 | ||
|
|
2ed0d65948 | ||
|
|
d459dc4ad1 | ||
|
|
40bc4622ef | ||
|
|
c0f818a07a | ||
|
|
8671fdeda6 | ||
|
|
2619850fb4 | ||
|
|
8feb97dc0d |
2
.github/workflows/release.yaml
vendored
2
.github/workflows/release.yaml
vendored
@@ -331,8 +331,6 @@ jobs:
|
|||||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||||
$env:PATH="$gopath;$env:PATH"
|
$env:PATH="$gopath;$env:PATH"
|
||||||
$env:OLLAMA_SKIP_GENERATE="1"
|
$env:OLLAMA_SKIP_GENERATE="1"
|
||||||
$env:NVIDIA_DIR=$(resolve-path ".\dist\deps")
|
|
||||||
$env:HIP_PATH=$(resolve-path ".\dist\deps")
|
|
||||||
& .\scripts\build_windows.ps1
|
& .\scripts\build_windows.ps1
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
|
|||||||
15
.github/workflows/test.yaml
vendored
15
.github/workflows/test.yaml
vendored
@@ -1,5 +1,15 @@
|
|||||||
name: test
|
name: test
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
# For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
|
||||||
|
# cancels running CI jobs and starts all new ones.
|
||||||
|
#
|
||||||
|
# For non-PR pushes, concurrency.group needs to be unique for every distinct
|
||||||
|
# CI run we want to have happen. Use run_id, which in practice means all
|
||||||
|
# non-PR CI runs will be allowed to run without preempting each other.
|
||||||
|
group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
@@ -21,7 +31,9 @@ jobs:
|
|||||||
- id: changes
|
- id: changes
|
||||||
run: |
|
run: |
|
||||||
changed() {
|
changed() {
|
||||||
git diff-tree -r --no-commit-id --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
|
git diff-tree -r --no-commit-id --name-only \
|
||||||
|
$(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
|
||||||
|
${{ github.event.pull_request.head.sha }} \
|
||||||
| xargs python3 -c "import sys; print(any([x.startswith('$1') for x in sys.argv[1:]]))"
|
| xargs python3 -c "import sys; print(any([x.startswith('$1') for x in sys.argv[1:]]))"
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -283,7 +295,6 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
go-version-file: go.mod
|
go-version-file: go.mod
|
||||||
cache: true
|
cache: true
|
||||||
- run: go get
|
|
||||||
- run: |
|
- run: |
|
||||||
case ${{ matrix.arch }} in
|
case ${{ matrix.arch }} in
|
||||||
amd64) echo ARCH=x86_64 ;;
|
amd64) echo ARCH=x86_64 ;;
|
||||||
|
|||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -11,4 +11,5 @@ ggml-metal.metal
|
|||||||
.idea
|
.idea
|
||||||
test_data
|
test_data
|
||||||
*.crt
|
*.crt
|
||||||
llm/build
|
llm/build
|
||||||
|
__debug_bin*
|
||||||
10
README.md
10
README.md
@@ -1,5 +1,5 @@
|
|||||||
<div align="center">
|
<div align="center">
|
||||||
<img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
|
<img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
# Ollama
|
# Ollama
|
||||||
@@ -51,7 +51,7 @@ Here are some example models that can be downloaded:
|
|||||||
| ------------------ | ---------- | ----- | ------------------------------ |
|
| ------------------ | ---------- | ----- | ------------------------------ |
|
||||||
| Llama 3 | 8B | 4.7GB | `ollama run llama3` |
|
| Llama 3 | 8B | 4.7GB | `ollama run llama3` |
|
||||||
| Llama 3 | 70B | 40GB | `ollama run llama3:70b` |
|
| Llama 3 | 70B | 40GB | `ollama run llama3:70b` |
|
||||||
| Phi-3 | 3,8B | 2.3GB | `ollama run phi3` |
|
| Phi-3 | 3.8B | 2.3GB | `ollama run phi3` |
|
||||||
| Mistral | 7B | 4.1GB | `ollama run mistral` |
|
| Mistral | 7B | 4.1GB | `ollama run mistral` |
|
||||||
| Neural Chat | 7B | 4.1GB | `ollama run neural-chat` |
|
| Neural Chat | 7B | 4.1GB | `ollama run neural-chat` |
|
||||||
| Starling | 7B | 4.1GB | `ollama run starling-lm` |
|
| Starling | 7B | 4.1GB | `ollama run starling-lm` |
|
||||||
@@ -173,7 +173,7 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
|
|||||||
The image features a yellow smiley face, which is likely the central focus of the picture.
|
The image features a yellow smiley face, which is likely the central focus of the picture.
|
||||||
```
|
```
|
||||||
|
|
||||||
### Pass in prompt as arguments
|
### Pass the prompt as an argument
|
||||||
|
|
||||||
```
|
```
|
||||||
$ ollama run llama3 "Summarize this file: $(cat README.md)"
|
$ ollama run llama3 "Summarize this file: $(cat README.md)"
|
||||||
@@ -294,7 +294,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [RAGFlow: Open-source Retrieval-Augmented Generation engine based on deep document understanding](https://github.com/infiniflow/ragflow)
|
- [RAGFlow: Open-source Retrieval-Augmented Generation engine based on deep document understanding](https://github.com/infiniflow/ragflow)
|
||||||
- [chat: chat web app for teams](https://github.com/swuecho/chat)
|
- [chat: chat web app for teams](https://github.com/swuecho/chat)
|
||||||
- [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama)
|
- [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama)
|
||||||
- [Ollama RAG Chatbot: Local Chat with multiples PDFs using Ollama and RAG.](https://github.com/datvodinh/rag-chatbot.git)
|
- [Ollama RAG Chatbot: Local Chat with multiple PDFs using Ollama and RAG.](https://github.com/datvodinh/rag-chatbot.git)
|
||||||
|
|
||||||
### Terminal
|
### Terminal
|
||||||
|
|
||||||
@@ -384,4 +384,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
|
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
|
||||||
|
|
||||||
### Supported backends
|
### Supported backends
|
||||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
||||||
@@ -18,6 +18,7 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
@@ -57,12 +58,36 @@ func checkError(resp *http.Response, body []byte) error {
|
|||||||
// If the variable is not specified, a default ollama host and port will be
|
// If the variable is not specified, a default ollama host and port will be
|
||||||
// used.
|
// used.
|
||||||
func ClientFromEnvironment() (*Client, error) {
|
func ClientFromEnvironment() (*Client, error) {
|
||||||
|
ollamaHost, err := GetOllamaHost()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Client{
|
||||||
|
base: &url.URL{
|
||||||
|
Scheme: ollamaHost.Scheme,
|
||||||
|
Host: net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
|
||||||
|
},
|
||||||
|
http: http.DefaultClient,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type OllamaHost struct {
|
||||||
|
Scheme string
|
||||||
|
Host string
|
||||||
|
Port string
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetOllamaHost() (OllamaHost, error) {
|
||||||
defaultPort := "11434"
|
defaultPort := "11434"
|
||||||
|
|
||||||
scheme, hostport, ok := strings.Cut(os.Getenv("OLLAMA_HOST"), "://")
|
hostVar := os.Getenv("OLLAMA_HOST")
|
||||||
|
hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
|
||||||
|
|
||||||
|
scheme, hostport, ok := strings.Cut(hostVar, "://")
|
||||||
switch {
|
switch {
|
||||||
case !ok:
|
case !ok:
|
||||||
scheme, hostport = "http", os.Getenv("OLLAMA_HOST")
|
scheme, hostport = "http", hostVar
|
||||||
case scheme == "http":
|
case scheme == "http":
|
||||||
defaultPort = "80"
|
defaultPort = "80"
|
||||||
case scheme == "https":
|
case scheme == "https":
|
||||||
@@ -82,12 +107,14 @@ func ClientFromEnvironment() (*Client, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Client{
|
if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
|
||||||
base: &url.URL{
|
return OllamaHost{}, ErrInvalidHostPort
|
||||||
Scheme: scheme,
|
}
|
||||||
Host: net.JoinHostPort(host, port),
|
|
||||||
},
|
return OllamaHost{
|
||||||
http: http.DefaultClient,
|
Scheme: scheme,
|
||||||
|
Host: host,
|
||||||
|
Port: port,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,12 @@
|
|||||||
package api
|
package api
|
||||||
|
|
||||||
import "testing"
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
func TestClientFromEnvironment(t *testing.T) {
|
func TestClientFromEnvironment(t *testing.T) {
|
||||||
type testCase struct {
|
type testCase struct {
|
||||||
@@ -40,4 +46,40 @@ func TestClientFromEnvironment(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hostTestCases := map[string]*testCase{
|
||||||
|
"empty": {value: "", expect: "127.0.0.1:11434"},
|
||||||
|
"only address": {value: "1.2.3.4", expect: "1.2.3.4:11434"},
|
||||||
|
"only port": {value: ":1234", expect: ":1234"},
|
||||||
|
"address and port": {value: "1.2.3.4:1234", expect: "1.2.3.4:1234"},
|
||||||
|
"hostname": {value: "example.com", expect: "example.com:11434"},
|
||||||
|
"hostname and port": {value: "example.com:1234", expect: "example.com:1234"},
|
||||||
|
"zero port": {value: ":0", expect: ":0"},
|
||||||
|
"too large port": {value: ":66000", err: ErrInvalidHostPort},
|
||||||
|
"too small port": {value: ":-1", err: ErrInvalidHostPort},
|
||||||
|
"ipv6 localhost": {value: "[::1]", expect: "[::1]:11434"},
|
||||||
|
"ipv6 world open": {value: "[::]", expect: "[::]:11434"},
|
||||||
|
"ipv6 no brackets": {value: "::1", expect: "[::1]:11434"},
|
||||||
|
"ipv6 + port": {value: "[::1]:1337", expect: "[::1]:1337"},
|
||||||
|
"extra space": {value: " 1.2.3.4 ", expect: "1.2.3.4:11434"},
|
||||||
|
"extra quotes": {value: "\"1.2.3.4\"", expect: "1.2.3.4:11434"},
|
||||||
|
"extra space+quotes": {value: " \" 1.2.3.4 \" ", expect: "1.2.3.4:11434"},
|
||||||
|
"extra single quotes": {value: "'1.2.3.4'", expect: "1.2.3.4:11434"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range hostTestCases {
|
||||||
|
t.Run(k, func(t *testing.T) {
|
||||||
|
t.Setenv("OLLAMA_HOST", v.value)
|
||||||
|
|
||||||
|
oh, err := GetOllamaHost()
|
||||||
|
if err != v.err {
|
||||||
|
t.Fatalf("expected %s, got %s", v.err, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
host := net.JoinHostPort(oh.Host, oh.Port)
|
||||||
|
assert.Equal(t, v.expect, host, fmt.Sprintf("%s: expected %s, got %s", k, v.expect, host))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -309,6 +309,7 @@ func (m *Metrics) Summary() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var ErrInvalidOpts = errors.New("invalid options")
|
var ErrInvalidOpts = errors.New("invalid options")
|
||||||
|
var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
|
||||||
|
|
||||||
func (opts *Options) FromMap(m map[string]interface{}) error {
|
func (opts *Options) FromMap(m map[string]interface{}) error {
|
||||||
valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
|
valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
|
||||||
|
|||||||
@@ -43,37 +43,36 @@ func getCLIFullPath(command string) string {
|
|||||||
return command
|
return command
|
||||||
}
|
}
|
||||||
|
|
||||||
func SpawnServer(ctx context.Context, command string) (chan int, error) {
|
func start(ctx context.Context, command string) (*exec.Cmd, error) {
|
||||||
done := make(chan int)
|
|
||||||
|
|
||||||
logDir := filepath.Dir(ServerLogFile)
|
|
||||||
_, err := os.Stat(logDir)
|
|
||||||
if errors.Is(err, os.ErrNotExist) {
|
|
||||||
if err := os.MkdirAll(logDir, 0o755); err != nil {
|
|
||||||
return done, fmt.Errorf("create ollama server log dir %s: %v", logDir, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd := getCmd(ctx, getCLIFullPath(command))
|
cmd := getCmd(ctx, getCLIFullPath(command))
|
||||||
// send stdout and stderr to a file
|
|
||||||
stdout, err := cmd.StdoutPipe()
|
stdout, err := cmd.StdoutPipe()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return done, fmt.Errorf("failed to spawn server stdout pipe %s", err)
|
return nil, fmt.Errorf("failed to spawn server stdout pipe: %w", err)
|
||||||
}
|
}
|
||||||
stderr, err := cmd.StderrPipe()
|
stderr, err := cmd.StderrPipe()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return done, fmt.Errorf("failed to spawn server stderr pipe %s", err)
|
return nil, fmt.Errorf("failed to spawn server stderr pipe: %w", err)
|
||||||
}
|
|
||||||
stdin, err := cmd.StdinPipe()
|
|
||||||
if err != nil {
|
|
||||||
return done, fmt.Errorf("failed to spawn server stdin pipe %s", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO - rotation
|
// TODO - rotation
|
||||||
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return done, fmt.Errorf("failed to create server log %w", err)
|
return nil, fmt.Errorf("failed to create server log: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logDir := filepath.Dir(ServerLogFile)
|
||||||
|
_, err = os.Stat(logDir)
|
||||||
|
if err != nil {
|
||||||
|
if !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return nil, fmt.Errorf("stat ollama server log dir %s: %v", logDir, err)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.MkdirAll(logDir, 0o755); err != nil {
|
||||||
|
return nil, fmt.Errorf("create ollama server log dir %s: %v", logDir, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
defer logFile.Close()
|
defer logFile.Close()
|
||||||
io.Copy(logFile, stdout) //nolint:errcheck
|
io.Copy(logFile, stdout) //nolint:errcheck
|
||||||
@@ -117,19 +116,33 @@ func SpawnServer(ctx context.Context, command string) (chan int, error) {
|
|||||||
|
|
||||||
// run the command and wait for it to finish
|
// run the command and wait for it to finish
|
||||||
if err := cmd.Start(); err != nil {
|
if err := cmd.Start(); err != nil {
|
||||||
return done, fmt.Errorf("failed to start server %w", err)
|
return nil, fmt.Errorf("failed to start server %w", err)
|
||||||
}
|
}
|
||||||
if cmd.Process != nil {
|
if cmd.Process != nil {
|
||||||
slog.Info(fmt.Sprintf("started ollama server with pid %d", cmd.Process.Pid))
|
slog.Info(fmt.Sprintf("started ollama server with pid %d", cmd.Process.Pid))
|
||||||
}
|
}
|
||||||
slog.Info(fmt.Sprintf("ollama server logs %s", ServerLogFile))
|
slog.Info(fmt.Sprintf("ollama server logs %s", ServerLogFile))
|
||||||
|
|
||||||
|
return cmd, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func SpawnServer(ctx context.Context, command string) (chan int, error) {
|
||||||
|
done := make(chan int)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
// Keep the server running unless we're shuttind down the app
|
// Keep the server running unless we're shuttind down the app
|
||||||
crashCount := 0
|
crashCount := 0
|
||||||
for {
|
for {
|
||||||
|
slog.Info("starting server...")
|
||||||
|
cmd, err := start(ctx, command)
|
||||||
|
if err != nil {
|
||||||
|
crashCount++
|
||||||
|
slog.Error(fmt.Sprintf("failed to start server %s", err))
|
||||||
|
time.Sleep(500 * time.Millisecond * time.Duration(crashCount))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
cmd.Wait() //nolint:errcheck
|
cmd.Wait() //nolint:errcheck
|
||||||
stdin.Close()
|
|
||||||
var code int
|
var code int
|
||||||
if cmd.ProcessState != nil {
|
if cmd.ProcessState != nil {
|
||||||
code = cmd.ProcessState.ExitCode()
|
code = cmd.ProcessState.ExitCode()
|
||||||
@@ -143,15 +156,12 @@ func SpawnServer(ctx context.Context, command string) (chan int, error) {
|
|||||||
default:
|
default:
|
||||||
crashCount++
|
crashCount++
|
||||||
slog.Warn(fmt.Sprintf("server crash %d - exit code %d - respawning", crashCount, code))
|
slog.Warn(fmt.Sprintf("server crash %d - exit code %d - respawning", crashCount, code))
|
||||||
time.Sleep(500 * time.Millisecond)
|
time.Sleep(500 * time.Millisecond * time.Duration(crashCount))
|
||||||
if err := cmd.Start(); err != nil {
|
break
|
||||||
slog.Error(fmt.Sprintf("failed to restart server %s", err))
|
|
||||||
// Keep trying, but back off if we keep failing
|
|
||||||
time.Sleep(time.Duration(crashCount) * time.Second)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return done, nil
|
return done, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -88,16 +88,12 @@ DialogFontSize=12
|
|||||||
[Files]
|
[Files]
|
||||||
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
|
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
|
||||||
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
||||||
Source: "..\dist\windows-amd64\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
Source: "..\dist\windows-{#ARCH}\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
||||||
Source: "..\dist\windows-amd64\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
|
Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
|
||||||
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
|
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
|
||||||
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
|
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
|
||||||
; Assumes v5.7, may need adjustments for v6
|
#if DirExists("..\dist\windows-amd64\rocm")
|
||||||
#if GetEnv("HIP_PATH") != ""
|
Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
|
||||||
Source: "{#GetEnv('HIP_PATH')}\bin\hipblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
|
|
||||||
Source: "{#GetEnv('HIP_PATH')}\bin\rocblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
|
|
||||||
; amdhip64.dll dependency comes from the driver and must be installed already
|
|
||||||
Source: "{#GetEnv('HIP_PATH')}\bin\rocblas\library\*"; DestDir: "{app}\rocm\rocblas\library\"; Flags: ignoreversion
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@@ -133,7 +129,7 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
|
|||||||
|
|
||||||
|
|
||||||
;FinishedHeadingLabel=Run your first model
|
;FinishedHeadingLabel=Run your first model
|
||||||
;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama2
|
;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3
|
||||||
;ClickFinish=%n
|
;ClickFinish=%n
|
||||||
|
|
||||||
[Registry]
|
[Registry]
|
||||||
|
|||||||
36
auth/auth.go
36
auth/auth.go
@@ -10,12 +10,44 @@ import (
|
|||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/crypto/ssh"
|
"golang.org/x/crypto/ssh"
|
||||||
)
|
)
|
||||||
|
|
||||||
const defaultPrivateKey = "id_ed25519"
|
const defaultPrivateKey = "id_ed25519"
|
||||||
|
|
||||||
|
func keyPath() (string, error) {
|
||||||
|
home, err := os.UserHomeDir()
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return filepath.Join(home, ".ollama", defaultPrivateKey), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetPublicKey() (string, error) {
|
||||||
|
keyPath, err := keyPath()
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
privateKeyFile, err := os.ReadFile(keyPath)
|
||||||
|
if err != nil {
|
||||||
|
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
|
||||||
|
|
||||||
|
return strings.TrimSpace(string(publicKey)), nil
|
||||||
|
}
|
||||||
|
|
||||||
func NewNonce(r io.Reader, length int) (string, error) {
|
func NewNonce(r io.Reader, length int) (string, error) {
|
||||||
nonce := make([]byte, length)
|
nonce := make([]byte, length)
|
||||||
if _, err := io.ReadFull(r, nonce); err != nil {
|
if _, err := io.ReadFull(r, nonce); err != nil {
|
||||||
@@ -26,13 +58,11 @@ func NewNonce(r io.Reader, length int) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func Sign(ctx context.Context, bts []byte) (string, error) {
|
func Sign(ctx context.Context, bts []byte) (string, error) {
|
||||||
home, err := os.UserHomeDir()
|
keyPath, err := keyPath()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
keyPath := filepath.Join(home, ".ollama", defaultPrivateKey)
|
|
||||||
|
|
||||||
privateKeyFile, err := os.ReadFile(keyPath)
|
privateKeyFile, err := os.ReadFile(keyPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
|
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
|
||||||
|
|||||||
207
cmd/cmd.go
207
cmd/cmd.go
@@ -32,10 +32,13 @@ import (
|
|||||||
"golang.org/x/term"
|
"golang.org/x/term"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/auth"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
"github.com/ollama/ollama/progress"
|
"github.com/ollama/ollama/progress"
|
||||||
"github.com/ollama/ollama/server"
|
"github.com/ollama/ollama/server"
|
||||||
|
"github.com/ollama/ollama/types/errtypes"
|
||||||
|
"github.com/ollama/ollama/types/model"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -54,12 +57,13 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
p := progress.NewProgress(os.Stderr)
|
p := progress.NewProgress(os.Stderr)
|
||||||
defer p.Stop()
|
defer p.Stop()
|
||||||
|
|
||||||
modelfile, err := os.ReadFile(filename)
|
modelfile, err := os.Open(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
defer modelfile.Close()
|
||||||
|
|
||||||
commands, err := parser.Parse(bytes.NewReader(modelfile))
|
commands, err := parser.Parse(modelfile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -73,10 +77,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
spinner := progress.NewSpinner(status)
|
spinner := progress.NewSpinner(status)
|
||||||
p.Add(status, spinner)
|
p.Add(status, spinner)
|
||||||
|
|
||||||
for _, c := range commands {
|
for i := range commands {
|
||||||
switch c.Name {
|
switch commands[i].Name {
|
||||||
case "model", "adapter":
|
case "model", "adapter":
|
||||||
path := c.Args
|
path := commands[i].Args
|
||||||
if path == "~" {
|
if path == "~" {
|
||||||
path = home
|
path = home
|
||||||
} else if strings.HasPrefix(path, "~/") {
|
} else if strings.HasPrefix(path, "~/") {
|
||||||
@@ -88,7 +92,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fi, err := os.Stat(path)
|
fi, err := os.Stat(path)
|
||||||
if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
|
if errors.Is(err, os.ErrNotExist) && commands[i].Name == "model" {
|
||||||
continue
|
continue
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -111,13 +115,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
name := c.Name
|
commands[i].Args = "@"+digest
|
||||||
if c.Name == "model" {
|
|
||||||
name = "from"
|
|
||||||
}
|
|
||||||
|
|
||||||
re := regexp.MustCompile(fmt.Sprintf(`(?im)^(%s)\s+%s\s*$`, name, c.Args))
|
|
||||||
modelfile = re.ReplaceAll(modelfile, []byte("$1 @"+digest))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -147,7 +145,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
|
|
||||||
quantization, _ := cmd.Flags().GetString("quantization")
|
quantization, _ := cmd.Flags().GetString("quantization")
|
||||||
|
|
||||||
request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile), Quantization: quantization}
|
request := api.CreateRequest{Name: args[0], Modelfile: parser.Format(commands), Quantization: quantization}
|
||||||
if err := client.Create(cmd.Context(), &request, fn); err != nil {
|
if err := client.Create(cmd.Context(), &request, fn); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -165,71 +163,97 @@ func tempZipFiles(path string) (string, error) {
|
|||||||
zipfile := zip.NewWriter(tempfile)
|
zipfile := zip.NewWriter(tempfile)
|
||||||
defer zipfile.Close()
|
defer zipfile.Close()
|
||||||
|
|
||||||
tfiles, err := filepath.Glob(filepath.Join(path, "pytorch_model-*.bin"))
|
detectContentType := func(path string) (string, error) {
|
||||||
if err != nil {
|
f, err := os.Open(path)
|
||||||
return "", err
|
|
||||||
} else if len(tfiles) == 0 {
|
|
||||||
tfiles, err = filepath.Glob(filepath.Join(path, "model-*.safetensors"))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
}
|
defer f.Close()
|
||||||
|
|
||||||
files := []string{}
|
var b bytes.Buffer
|
||||||
files = append(files, tfiles...)
|
b.Grow(512)
|
||||||
|
|
||||||
if len(files) == 0 {
|
if _, err := io.CopyN(&b, f, 512); err != nil && !errors.Is(err, io.EOF) {
|
||||||
return "", fmt.Errorf("no models were found in '%s'", path)
|
|
||||||
}
|
|
||||||
|
|
||||||
// add the safetensor/torch config file + tokenizer
|
|
||||||
files = append(files, filepath.Join(path, "config.json"))
|
|
||||||
files = append(files, filepath.Join(path, "params.json"))
|
|
||||||
files = append(files, filepath.Join(path, "added_tokens.json"))
|
|
||||||
files = append(files, filepath.Join(path, "tokenizer.model"))
|
|
||||||
|
|
||||||
for _, fn := range files {
|
|
||||||
f, err := os.Open(fn)
|
|
||||||
|
|
||||||
// just skip whatever files aren't there
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
if strings.HasSuffix(fn, "tokenizer.model") {
|
|
||||||
// try the parent dir before giving up
|
|
||||||
parentDir := filepath.Dir(path)
|
|
||||||
newFn := filepath.Join(parentDir, "tokenizer.model")
|
|
||||||
f, err = os.Open(newFn)
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
continue
|
|
||||||
} else if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
} else if err != nil {
|
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
contentType, _, _ := strings.Cut(http.DetectContentType(b.Bytes()), ";")
|
||||||
|
return contentType, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
glob := func(pattern, contentType string) ([]string, error) {
|
||||||
|
matches, err := filepath.Glob(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, safetensor := range matches {
|
||||||
|
if ct, err := detectContentType(safetensor); err != nil {
|
||||||
|
return nil, err
|
||||||
|
} else if ct != contentType {
|
||||||
|
return nil, fmt.Errorf("invalid content type: expected %s for %s", ct, safetensor)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return matches, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var files []string
|
||||||
|
if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 {
|
||||||
|
// safetensors files might be unresolved git lfs references; skip if they are
|
||||||
|
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
|
||||||
|
files = append(files, st...)
|
||||||
|
} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
|
||||||
|
// pytorch files might also be unresolved git lfs references; skip if they are
|
||||||
|
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
|
||||||
|
files = append(files, pt...)
|
||||||
|
} else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/octet-stream"); len(pt) > 0 {
|
||||||
|
// pytorch files might also be unresolved git lfs references; skip if they are
|
||||||
|
// covers consolidated.x.pth, consolidated.pth
|
||||||
|
files = append(files, pt...)
|
||||||
|
} else {
|
||||||
|
return "", errors.New("no safetensors or torch files found")
|
||||||
|
}
|
||||||
|
|
||||||
|
// add configuration files, json files are detected as text/plain
|
||||||
|
js, err := glob(filepath.Join(path, "*.json"), "text/plain")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
files = append(files, js...)
|
||||||
|
|
||||||
|
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
|
||||||
|
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
|
||||||
|
// tokenizer.model might be a unresolved git lfs reference; error if it is
|
||||||
|
files = append(files, tks...)
|
||||||
|
} else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 {
|
||||||
|
// some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B)
|
||||||
|
files = append(files, tks...)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, file := range files {
|
||||||
|
f, err := os.Open(file)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
fi, err := f.Stat()
|
fi, err := f.Stat()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
h, err := zip.FileInfoHeader(fi)
|
zfi, err := zip.FileInfoHeader(fi)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
h.Name = filepath.Base(fn)
|
zf, err := zipfile.CreateHeader(zfi)
|
||||||
h.Method = zip.Store
|
|
||||||
|
|
||||||
w, err := zipfile.CreateHeader(h)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = io.Copy(w, f)
|
if _, err := io.Copy(zf, f); err != nil {
|
||||||
if err != nil {
|
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -331,6 +355,47 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
|||||||
return generateInteractive(cmd, opts)
|
return generateInteractive(cmd, opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func errFromUnknownKey(unknownKeyErr error) error {
|
||||||
|
// find SSH public key in the error message
|
||||||
|
sshKeyPattern := `ssh-\w+ [^\s"]+`
|
||||||
|
re := regexp.MustCompile(sshKeyPattern)
|
||||||
|
matches := re.FindStringSubmatch(unknownKeyErr.Error())
|
||||||
|
|
||||||
|
if len(matches) > 0 {
|
||||||
|
serverPubKey := matches[0]
|
||||||
|
|
||||||
|
localPubKey, err := auth.GetPublicKey()
|
||||||
|
if err != nil {
|
||||||
|
return unknownKeyErr
|
||||||
|
}
|
||||||
|
|
||||||
|
if runtime.GOOS == "linux" && serverPubKey != localPubKey {
|
||||||
|
// try the ollama service public key
|
||||||
|
svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
|
||||||
|
if err != nil {
|
||||||
|
return unknownKeyErr
|
||||||
|
}
|
||||||
|
localPubKey = strings.TrimSpace(string(svcPubKey))
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if the returned public key matches the local public key, this prevents adding a remote key to the user's account
|
||||||
|
if serverPubKey != localPubKey {
|
||||||
|
return unknownKeyErr
|
||||||
|
}
|
||||||
|
|
||||||
|
var msg strings.Builder
|
||||||
|
msg.WriteString(unknownKeyErr.Error())
|
||||||
|
msg.WriteString("\n\nYour ollama key is:\n")
|
||||||
|
msg.WriteString(localPubKey)
|
||||||
|
msg.WriteString("\nAdd your key at:\n")
|
||||||
|
msg.WriteString("https://ollama.com/settings/keys")
|
||||||
|
|
||||||
|
return errors.New(msg.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
return unknownKeyErr
|
||||||
|
}
|
||||||
|
|
||||||
func PushHandler(cmd *cobra.Command, args []string) error {
|
func PushHandler(cmd *cobra.Command, args []string) error {
|
||||||
client, err := api.ClientFromEnvironment()
|
client, err := api.ClientFromEnvironment()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -378,6 +443,20 @@ func PushHandler(cmd *cobra.Command, args []string) error {
|
|||||||
|
|
||||||
request := api.PushRequest{Name: args[0], Insecure: insecure}
|
request := api.PushRequest{Name: args[0], Insecure: insecure}
|
||||||
if err := client.Push(cmd.Context(), &request, fn); err != nil {
|
if err := client.Push(cmd.Context(), &request, fn); err != nil {
|
||||||
|
if spinner != nil {
|
||||||
|
spinner.Stop()
|
||||||
|
}
|
||||||
|
if strings.Contains(err.Error(), "access denied") {
|
||||||
|
return errors.New("you are not authorized to push to this namespace, create the model under a namespace you own")
|
||||||
|
}
|
||||||
|
host := model.ParseName(args[0]).Host
|
||||||
|
isOllamaHost := strings.HasSuffix(host, ".ollama.ai") || strings.HasSuffix(host, ".ollama.com")
|
||||||
|
if strings.Contains(err.Error(), errtypes.UnknownOllamaKeyErrMsg) && isOllamaHost {
|
||||||
|
// the user has not added their ollama key to ollama.com
|
||||||
|
// re-throw an error with a more user-friendly message
|
||||||
|
return errFromUnknownKey(err)
|
||||||
|
}
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -805,19 +884,17 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func RunServer(cmd *cobra.Command, _ []string) error {
|
func RunServer(cmd *cobra.Command, _ []string) error {
|
||||||
host, port, err := net.SplitHostPort(strings.Trim(os.Getenv("OLLAMA_HOST"), "\"'"))
|
// retrieve the OLLAMA_HOST environment variable
|
||||||
|
ollamaHost, err := api.GetOllamaHost()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
host, port = "127.0.0.1", "11434"
|
return err
|
||||||
if ip := net.ParseIP(strings.Trim(os.Getenv("OLLAMA_HOST"), "[]")); ip != nil {
|
|
||||||
host = ip.String()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := initializeKeypair(); err != nil {
|
if err := initializeKeypair(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
ln, err := net.Listen("tcp", net.JoinHostPort(host, port))
|
ln, err := net.Listen("tcp", net.JoinHostPort(ollamaHost.Host, ollamaHost.Port))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -1043,7 +1120,7 @@ Environment Variables:
|
|||||||
RunE: ListHandler,
|
RunE: ListHandler,
|
||||||
}
|
}
|
||||||
copyCmd := &cobra.Command{
|
copyCmd := &cobra.Command{
|
||||||
Use: "cp SOURCE TARGET",
|
Use: "cp SOURCE DESTINATION",
|
||||||
Short: "Copy a model",
|
Short: "Copy a model",
|
||||||
Args: cobra.ExactArgs(2),
|
Args: cobra.ExactArgs(2),
|
||||||
PreRunE: checkServerHeartbeat,
|
PreRunE: checkServerHeartbeat,
|
||||||
|
|||||||
@@ -94,6 +94,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
|||||||
fmt.Fprintln(os.Stderr, " /show Show model information")
|
fmt.Fprintln(os.Stderr, " /show Show model information")
|
||||||
fmt.Fprintln(os.Stderr, " /load <model> Load a session or model")
|
fmt.Fprintln(os.Stderr, " /load <model> Load a session or model")
|
||||||
fmt.Fprintln(os.Stderr, " /save <model> Save your current session")
|
fmt.Fprintln(os.Stderr, " /save <model> Save your current session")
|
||||||
|
fmt.Fprintln(os.Stderr, " /clear Clear session context")
|
||||||
fmt.Fprintln(os.Stderr, " /bye Exit")
|
fmt.Fprintln(os.Stderr, " /bye Exit")
|
||||||
fmt.Fprintln(os.Stderr, " /?, /help Help for a command")
|
fmt.Fprintln(os.Stderr, " /?, /help Help for a command")
|
||||||
fmt.Fprintln(os.Stderr, " /? shortcuts Help for keyboard shortcuts")
|
fmt.Fprintln(os.Stderr, " /? shortcuts Help for keyboard shortcuts")
|
||||||
@@ -280,6 +281,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
|||||||
}
|
}
|
||||||
fmt.Printf("Created new model '%s'\n", args[1])
|
fmt.Printf("Created new model '%s'\n", args[1])
|
||||||
continue
|
continue
|
||||||
|
case strings.HasPrefix(line, "/clear"):
|
||||||
|
opts.Messages = []api.Message{}
|
||||||
|
fmt.Println("Cleared session context")
|
||||||
|
continue
|
||||||
case strings.HasPrefix(line, "/set"):
|
case strings.HasPrefix(line, "/set"):
|
||||||
args := strings.Fields(line)
|
args := strings.Fields(line)
|
||||||
if len(args) > 1 {
|
if len(args) > 1 {
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -47,7 +48,7 @@ type ByteOrder interface {
|
|||||||
type ModelArch interface {
|
type ModelArch interface {
|
||||||
GetTensors() error
|
GetTensors() error
|
||||||
LoadVocab() error
|
LoadVocab() error
|
||||||
WriteGGUF() (string, error)
|
WriteGGUF(io.WriteSeeker) error
|
||||||
}
|
}
|
||||||
|
|
||||||
type ModelFormat interface {
|
type ModelFormat interface {
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ func (m *GemmaModel) LoadVocab() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *GemmaModel) WriteGGUF() (string, error) {
|
func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||||
kv := llm.KV{
|
kv := llm.KV{
|
||||||
"general.architecture": "gemma",
|
"general.architecture": "gemma",
|
||||||
"general.name": m.Name,
|
"general.name": m.Name,
|
||||||
@@ -122,16 +122,5 @@ func (m *GemmaModel) WriteGGUF() (string, error) {
|
|||||||
"tokenizer.ggml.add_eos_token": false,
|
"tokenizer.ggml.add_eos_token": false,
|
||||||
}
|
}
|
||||||
|
|
||||||
f, err := os.CreateTemp("", "ollama-gguf")
|
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
mod := llm.NewGGUFV3(m.Params.ByteOrder)
|
|
||||||
if err := mod.Encode(f, kv, m.Tensors); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
return f.Name(), nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -132,7 +132,7 @@ func (m *LlamaModel) LoadVocab() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *LlamaModel) WriteGGUF() (string, error) {
|
func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||||
kv := llm.KV{
|
kv := llm.KV{
|
||||||
"general.architecture": "llama",
|
"general.architecture": "llama",
|
||||||
"general.name": m.Name,
|
"general.name": m.Name,
|
||||||
@@ -161,16 +161,9 @@ func (m *LlamaModel) WriteGGUF() (string, error) {
|
|||||||
|
|
||||||
f, err := os.CreateTemp("", "ollama-gguf")
|
f, err := os.CreateTemp("", "ollama-gguf")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return err
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
|
||||||
mod := llm.NewGGUFV3(m.Params.ByteOrder)
|
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(f, kv, m.Tensors)
|
||||||
if err := mod.Encode(f, kv, m.Tensors); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Debug(fmt.Sprintf("gguf file = %s", f.Name()))
|
|
||||||
|
|
||||||
return f.Name(), nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -132,7 +132,7 @@ func (m *MistralModel) LoadVocab() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MistralModel) WriteGGUF() (string, error) {
|
func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||||
kv := llm.KV{
|
kv := llm.KV{
|
||||||
"general.architecture": "llama",
|
"general.architecture": "llama",
|
||||||
"general.name": m.Name,
|
"general.name": m.Name,
|
||||||
@@ -158,16 +158,5 @@ func (m *MistralModel) WriteGGUF() (string, error) {
|
|||||||
"tokenizer.ggml.unknown_token_id": uint32(0),
|
"tokenizer.ggml.unknown_token_id": uint32(0),
|
||||||
}
|
}
|
||||||
|
|
||||||
f, err := os.CreateTemp("", "ollama-gguf")
|
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
mod := llm.NewGGUFV3(m.Params.ByteOrder)
|
|
||||||
if err := mod.Encode(f, kv, m.Tensors); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
return f.Name(), nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
package convert
|
package convert
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"os"
|
"io"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
@@ -47,7 +47,7 @@ func (m *MixtralModel) LoadVocab() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MixtralModel) WriteGGUF() (string, error) {
|
func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||||
kv := llm.KV{
|
kv := llm.KV{
|
||||||
"general.architecture": "llama",
|
"general.architecture": "llama",
|
||||||
"general.name": m.Name,
|
"general.name": m.Name,
|
||||||
@@ -81,16 +81,5 @@ func (m *MixtralModel) WriteGGUF() (string, error) {
|
|||||||
"tokenizer.ggml.add_eos_token": false,
|
"tokenizer.ggml.add_eos_token": false,
|
||||||
}
|
}
|
||||||
|
|
||||||
f, err := os.CreateTemp("", "ollama-gguf")
|
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
mod := llm.NewGGUFV3(m.Params.ByteOrder)
|
|
||||||
if err := mod.Encode(f, kv, m.Tensors); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
return f.Name(), nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ Typically the build scripts will auto-detect CUDA, however, if your Linux distro
|
|||||||
or installation approach uses unusual paths, you can specify the location by
|
or installation approach uses unusual paths, you can specify the location by
|
||||||
specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
|
specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
|
||||||
libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
|
libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
|
||||||
set set of target CUDA architectues by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
|
a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
|
||||||
|
|
||||||
Then generate dependencies:
|
Then generate dependencies:
|
||||||
|
|
||||||
@@ -142,4 +142,4 @@ In addition to the common Windows development tools described above, install AMD
|
|||||||
- [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
|
- [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
|
||||||
- [Strawberry Perl](https://strawberryperl.com/)
|
- [Strawberry Perl](https://strawberryperl.com/)
|
||||||
|
|
||||||
Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
|
Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
|
||||||
|
|||||||
@@ -17,10 +17,12 @@ Let's start by asking a simple question that we can get an answer to from the **
|
|||||||
Then we can create a model and ask the question:
|
Then we can create a model and ask the question:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from langchain.llms import Ollama
|
from langchain_community.llms import Ollama
|
||||||
ollama = Ollama(base_url='http://localhost:11434',
|
ollama = Ollama(
|
||||||
model="llama2")
|
base_url='http://localhost:11434',
|
||||||
print(ollama("why is the sky blue"))
|
model="llama3"
|
||||||
|
)
|
||||||
|
print(ollama.invoke("why is the sky blue"))
|
||||||
```
|
```
|
||||||
|
|
||||||
Notice that we are defining the model and the base URL for Ollama.
|
Notice that we are defining the model and the base URL for Ollama.
|
||||||
|
|||||||
@@ -1,47 +1,47 @@
|
|||||||
# Ollama Windows Preview
|
# Ollama Windows Preview
|
||||||
|
|
||||||
Welcome to the Ollama Windows preview.
|
Welcome to the Ollama Windows preview.
|
||||||
|
|
||||||
No more WSL required!
|
No more WSL required!
|
||||||
|
|
||||||
Ollama now runs as a native Windows application, including NVIDIA and AMD Radeon GPU support.
|
Ollama now runs as a native Windows application, including NVIDIA and AMD Radeon GPU support.
|
||||||
After installing Ollama Windows Preview, Ollama will run in the background and
|
After installing Ollama Windows Preview, Ollama will run in the background and
|
||||||
the `ollama` command line is available in `cmd`, `powershell` or your favorite
|
the `ollama` command line is available in `cmd`, `powershell` or your favorite
|
||||||
terminal application. As usual the Ollama [api](./api.md) will be served on
|
terminal application. As usual the Ollama [api](./api.md) will be served on
|
||||||
`http://localhost:11434`.
|
`http://localhost:11434`.
|
||||||
|
|
||||||
As this is a preview release, you should expect a few bugs here and there. If
|
As this is a preview release, you should expect a few bugs here and there. If
|
||||||
you run into a problem you can reach out on
|
you run into a problem you can reach out on
|
||||||
[Discord](https://discord.gg/ollama), or file an
|
[Discord](https://discord.gg/ollama), or file an
|
||||||
[issue](https://github.com/ollama/ollama/issues).
|
[issue](https://github.com/ollama/ollama/issues).
|
||||||
Logs will often be helpful in dianosing the problem (see
|
Logs will often be helpful in diagnosing the problem (see
|
||||||
[Troubleshooting](#troubleshooting) below)
|
[Troubleshooting](#troubleshooting) below)
|
||||||
|
|
||||||
## System Requirements
|
## System Requirements
|
||||||
|
|
||||||
* Windows 10 or newer, Home or Pro
|
* Windows 10 or newer, Home or Pro
|
||||||
* NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
|
* NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
|
||||||
* AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
|
* AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
|
||||||
|
|
||||||
## API Access
|
## API Access
|
||||||
|
|
||||||
Here's a quick example showing API access from `powershell`
|
Here's a quick example showing API access from `powershell`
|
||||||
```powershell
|
```powershell
|
||||||
(Invoke-WebRequest -method POST -Body '{"model":"llama2", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
|
(Invoke-WebRequest -method POST -Body '{"model":"llama2", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
|
||||||
```
|
```
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
While we're in preview, `OLLAMA_DEBUG` is always enabled, which adds
|
While we're in preview, `OLLAMA_DEBUG` is always enabled, which adds
|
||||||
a "view logs" menu item to the app, and increses logging for the GUI app and
|
a "view logs" menu item to the app, and increses logging for the GUI app and
|
||||||
server.
|
server.
|
||||||
|
|
||||||
Ollama on Windows stores files in a few different locations. You can view them in
|
Ollama on Windows stores files in a few different locations. You can view them in
|
||||||
the explorer window by hitting `<cmd>+R` and type in:
|
the explorer window by hitting `<cmd>+R` and type in:
|
||||||
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
|
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
|
||||||
- *app.log* contains logs from the GUI application
|
- *app.log* contains logs from the GUI application
|
||||||
- *server.log* contains the server logs
|
- *server.log* contains the server logs
|
||||||
- *upgrade.log* contains log output for upgrades
|
- *upgrade.log* contains log output for upgrades
|
||||||
- `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
|
- `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
|
||||||
- `explorer %HOMEPATH%\.ollama` contains models and configuration
|
- `explorer %HOMEPATH%\.ollama` contains models and configuration
|
||||||
- `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories
|
- `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories
|
||||||
|
|||||||
@@ -32,9 +32,25 @@ func PayloadsDir() (string, error) {
|
|||||||
slog.Error("failed to lookup executable path", "error", err)
|
slog.Error("failed to lookup executable path", "error", err)
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cwd, err := os.Getwd()
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("failed to lookup working directory", "error", err)
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
var paths []string
|
||||||
|
for _, root := range []string{filepath.Dir(appExe), cwd} {
|
||||||
|
paths = append(paths,
|
||||||
|
filepath.Join(root),
|
||||||
|
filepath.Join(root, "windows-"+runtime.GOARCH),
|
||||||
|
filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// Try a few variations to improve developer experience when building from source in the local tree
|
// Try a few variations to improve developer experience when building from source in the local tree
|
||||||
for _, d := range []string{".", "windows-" + runtime.GOARCH, "dist\\windows-" + runtime.GOARCH} {
|
for _, p := range paths {
|
||||||
candidate := filepath.Join(filepath.Dir(appExe), d, "ollama_runners")
|
candidate := filepath.Join(p, "ollama_runners")
|
||||||
_, err := os.Stat(candidate)
|
_, err := os.Stat(candidate)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
runnersDir = candidate
|
runnersDir = candidate
|
||||||
|
|||||||
@@ -10,6 +10,12 @@ package gpu
|
|||||||
import "C"
|
import "C"
|
||||||
import (
|
import (
|
||||||
"runtime"
|
"runtime"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/format"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
metalMinimumMemory = 512 * format.MebiByte
|
||||||
)
|
)
|
||||||
|
|
||||||
func GetGPUInfo() GpuInfoList {
|
func GetGPUInfo() GpuInfoList {
|
||||||
@@ -32,7 +38,7 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
// TODO is there a way to gather actual allocated video memory? (currentAllocatedSize doesn't work)
|
// TODO is there a way to gather actual allocated video memory? (currentAllocatedSize doesn't work)
|
||||||
info.FreeMemory = info.TotalMemory
|
info.FreeMemory = info.TotalMemory
|
||||||
|
|
||||||
info.MinimumMemory = 0
|
info.MinimumMemory = metalMinimumMemory
|
||||||
return []GpuInfo{info}
|
return []GpuInfo{info}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -107,7 +107,7 @@ func startServer(ctx context.Context, ollamaHost string) error {
|
|||||||
|
|
||||||
if tmp := os.Getenv("OLLAMA_HOST"); tmp != ollamaHost {
|
if tmp := os.Getenv("OLLAMA_HOST"); tmp != ollamaHost {
|
||||||
slog.Info("setting env", "OLLAMA_HOST", ollamaHost)
|
slog.Info("setting env", "OLLAMA_HOST", ollamaHost)
|
||||||
os.Setenv("OLLAMA_HOST", ollamaHost)
|
t.Setenv("OLLAMA_HOST", ollamaHost)
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Info("starting server", "url", ollamaHost)
|
slog.Info("starting server", "url", ollamaHost)
|
||||||
|
|||||||
15
llm/ext_server/server.cpp
vendored
15
llm/ext_server/server.cpp
vendored
@@ -1032,7 +1032,7 @@ struct llama_server_context
|
|||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model))
|
if (!slot.cache_tokens.empty() && llama_token_is_eog(model, result.tok))
|
||||||
{
|
{
|
||||||
slot.stopped_eos = true;
|
slot.stopped_eos = true;
|
||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
@@ -1144,12 +1144,15 @@ struct llama_server_context
|
|||||||
|
|
||||||
res.result_json = json
|
res.result_json = json
|
||||||
{
|
{
|
||||||
{"content", tkn.text_to_send},
|
|
||||||
{"stop", false},
|
{"stop", false},
|
||||||
{"slot_id", slot.id},
|
{"slot_id", slot.id},
|
||||||
{"multimodal", multimodal}
|
{"multimodal", multimodal}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (!llama_token_is_eog(model, tkn.tok)) {
|
||||||
|
res.result_json["content"] = tkn.text_to_send;
|
||||||
|
}
|
||||||
|
|
||||||
if (slot.sparams.n_probs > 0)
|
if (slot.sparams.n_probs > 0)
|
||||||
{
|
{
|
||||||
std::vector<completion_token_output> probs_output = {};
|
std::vector<completion_token_output> probs_output = {};
|
||||||
@@ -2644,18 +2647,18 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
|||||||
if (strncmp(sep, "int:", 4) == 0) {
|
if (strncmp(sep, "int:", 4) == 0) {
|
||||||
sep += 4;
|
sep += 4;
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
|
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
|
||||||
kvo.int_value = std::atol(sep);
|
kvo.val_i64 = std::atol(sep);
|
||||||
} else if (strncmp(sep, "float:", 6) == 0) {
|
} else if (strncmp(sep, "float:", 6) == 0) {
|
||||||
sep += 6;
|
sep += 6;
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
|
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
|
||||||
kvo.float_value = std::atof(sep);
|
kvo.val_f64 = std::atof(sep);
|
||||||
} else if (strncmp(sep, "bool:", 5) == 0) {
|
} else if (strncmp(sep, "bool:", 5) == 0) {
|
||||||
sep += 5;
|
sep += 5;
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
|
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
|
||||||
if (std::strcmp(sep, "true") == 0) {
|
if (std::strcmp(sep, "true") == 0) {
|
||||||
kvo.bool_value = true;
|
kvo.val_bool = true;
|
||||||
} else if (std::strcmp(sep, "false") == 0) {
|
} else if (std::strcmp(sep, "false") == 0) {
|
||||||
kvo.bool_value = false;
|
kvo.val_bool = false;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
|
fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
|
|||||||
140
llm/filetype.go
Normal file
140
llm/filetype.go
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
package llm
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
type fileType uint32
|
||||||
|
|
||||||
|
const (
|
||||||
|
fileTypeF32 fileType = iota
|
||||||
|
fileTypeF16
|
||||||
|
fileTypeQ4_0
|
||||||
|
fileTypeQ4_1
|
||||||
|
fileTypeQ4_1_F16
|
||||||
|
fileTypeQ4_2 // unused
|
||||||
|
fileTypeQ4_3 // unused
|
||||||
|
fileTypeQ8_0
|
||||||
|
fileTypeQ5_0
|
||||||
|
fileTypeQ5_1
|
||||||
|
fileTypeQ2_K
|
||||||
|
fileTypeQ3_K_S
|
||||||
|
fileTypeQ3_K_M
|
||||||
|
fileTypeQ3_K_L
|
||||||
|
fileTypeQ4_K_S
|
||||||
|
fileTypeQ4_K_M
|
||||||
|
fileTypeQ5_K_S
|
||||||
|
fileTypeQ5_K_M
|
||||||
|
fileTypeQ6_K
|
||||||
|
fileTypeIQ2_XXS
|
||||||
|
fileTypeIQ2_XS
|
||||||
|
fileTypeQ2_K_S
|
||||||
|
fileTypeQ3_K_XS
|
||||||
|
fileTypeIQ3_XXS
|
||||||
|
|
||||||
|
fileTypeUnknown
|
||||||
|
)
|
||||||
|
|
||||||
|
func ParseFileType(s string) (fileType, error) {
|
||||||
|
switch s {
|
||||||
|
case "F32":
|
||||||
|
return fileTypeF32, nil
|
||||||
|
case "F16":
|
||||||
|
return fileTypeF16, nil
|
||||||
|
case "Q4_0":
|
||||||
|
return fileTypeQ4_0, nil
|
||||||
|
case "Q4_1":
|
||||||
|
return fileTypeQ4_1, nil
|
||||||
|
case "Q4_1_F16":
|
||||||
|
return fileTypeQ4_1_F16, nil
|
||||||
|
case "Q8_0":
|
||||||
|
return fileTypeQ8_0, nil
|
||||||
|
case "Q5_0":
|
||||||
|
return fileTypeQ5_0, nil
|
||||||
|
case "Q5_1":
|
||||||
|
return fileTypeQ5_1, nil
|
||||||
|
case "Q2_K":
|
||||||
|
return fileTypeQ2_K, nil
|
||||||
|
case "Q3_K_S":
|
||||||
|
return fileTypeQ3_K_S, nil
|
||||||
|
case "Q3_K_M":
|
||||||
|
return fileTypeQ3_K_M, nil
|
||||||
|
case "Q3_K_L":
|
||||||
|
return fileTypeQ3_K_L, nil
|
||||||
|
case "Q4_K_S":
|
||||||
|
return fileTypeQ4_K_S, nil
|
||||||
|
case "Q4_K_M":
|
||||||
|
return fileTypeQ4_K_M, nil
|
||||||
|
case "Q5_K_S":
|
||||||
|
return fileTypeQ5_K_S, nil
|
||||||
|
case "Q5_K_M":
|
||||||
|
return fileTypeQ5_K_M, nil
|
||||||
|
case "Q6_K":
|
||||||
|
return fileTypeQ6_K, nil
|
||||||
|
case "IQ2_XXS":
|
||||||
|
return fileTypeIQ2_XXS, nil
|
||||||
|
case "IQ2_XS":
|
||||||
|
return fileTypeIQ2_XS, nil
|
||||||
|
case "Q2_K_S":
|
||||||
|
return fileTypeQ2_K_S, nil
|
||||||
|
case "Q3_K_XS":
|
||||||
|
return fileTypeQ3_K_XS, nil
|
||||||
|
case "IQ3_XXS":
|
||||||
|
return fileTypeIQ3_XXS, nil
|
||||||
|
default:
|
||||||
|
return fileTypeUnknown, fmt.Errorf("unknown fileType: %s", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t fileType) String() string {
|
||||||
|
switch t {
|
||||||
|
case fileTypeF32:
|
||||||
|
return "F32"
|
||||||
|
case fileTypeF16:
|
||||||
|
return "F16"
|
||||||
|
case fileTypeQ4_0:
|
||||||
|
return "Q4_0"
|
||||||
|
case fileTypeQ4_1:
|
||||||
|
return "Q4_1"
|
||||||
|
case fileTypeQ4_1_F16:
|
||||||
|
return "Q4_1_F16"
|
||||||
|
case fileTypeQ8_0:
|
||||||
|
return "Q8_0"
|
||||||
|
case fileTypeQ5_0:
|
||||||
|
return "Q5_0"
|
||||||
|
case fileTypeQ5_1:
|
||||||
|
return "Q5_1"
|
||||||
|
case fileTypeQ2_K:
|
||||||
|
return "Q2_K"
|
||||||
|
case fileTypeQ3_K_S:
|
||||||
|
return "Q3_K_S"
|
||||||
|
case fileTypeQ3_K_M:
|
||||||
|
return "Q3_K_M"
|
||||||
|
case fileTypeQ3_K_L:
|
||||||
|
return "Q3_K_L"
|
||||||
|
case fileTypeQ4_K_S:
|
||||||
|
return "Q4_K_S"
|
||||||
|
case fileTypeQ4_K_M:
|
||||||
|
return "Q4_K_M"
|
||||||
|
case fileTypeQ5_K_S:
|
||||||
|
return "Q5_K_S"
|
||||||
|
case fileTypeQ5_K_M:
|
||||||
|
return "Q5_K_M"
|
||||||
|
case fileTypeQ6_K:
|
||||||
|
return "Q6_K"
|
||||||
|
case fileTypeIQ2_XXS:
|
||||||
|
return "IQ2_XXS"
|
||||||
|
case fileTypeIQ2_XS:
|
||||||
|
return "IQ2_XS"
|
||||||
|
case fileTypeQ2_K_S:
|
||||||
|
return "Q2_K_S"
|
||||||
|
case fileTypeQ3_K_XS:
|
||||||
|
return "Q3_K_XS"
|
||||||
|
case fileTypeIQ3_XXS:
|
||||||
|
return "IQ3_XXS"
|
||||||
|
default:
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t fileType) Value() uint32 {
|
||||||
|
return uint32(t)
|
||||||
|
}
|
||||||
@@ -26,16 +26,25 @@ function amdGPUs {
|
|||||||
$GPU_LIST -join ';'
|
$GPU_LIST -join ';'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function init_vars {
|
function init_vars {
|
||||||
$script:SRC_DIR = $(resolve-path "..\..\")
|
if (!$script:SRC_DIR) {
|
||||||
$script:llamacppDir = "../llama.cpp"
|
$script:SRC_DIR = $(resolve-path "..\..\")
|
||||||
|
}
|
||||||
|
if (!$script:llamacppDir) {
|
||||||
|
$script:llamacppDir = "../llama.cpp"
|
||||||
|
}
|
||||||
|
if (!$script:cmakeTargets) {
|
||||||
|
$script:cmakeTargets = @("ollama_llama_server")
|
||||||
|
}
|
||||||
$script:cmakeDefs = @(
|
$script:cmakeDefs = @(
|
||||||
"-DBUILD_SHARED_LIBS=on",
|
"-DBUILD_SHARED_LIBS=on",
|
||||||
"-DLLAMA_NATIVE=off"
|
"-DLLAMA_NATIVE=off"
|
||||||
)
|
)
|
||||||
$script:cmakeTargets = @("ollama_llama_server")
|
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
|
||||||
$script:ARCH = "amd64" # arm not yet supported.
|
$script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
|
||||||
$script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
|
$script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
|
||||||
|
md "$script:DIST_BASE" -ea 0 > $null
|
||||||
if ($env:CGO_CFLAGS -contains "-g") {
|
if ($env:CGO_CFLAGS -contains "-g") {
|
||||||
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
|
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
|
||||||
$script:config = "RelWithDebInfo"
|
$script:config = "RelWithDebInfo"
|
||||||
@@ -166,137 +175,195 @@ function cleanup {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
init_vars
|
|
||||||
git_module_setup
|
|
||||||
apply_patches
|
|
||||||
|
|
||||||
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
|
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
|
||||||
# -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
|
# -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
|
||||||
# -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
|
# -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
|
||||||
|
|
||||||
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
|
|
||||||
|
|
||||||
if ($null -eq ${env:OLLAMA_SKIP_CPU_GENERATE}) {
|
function build_static() {
|
||||||
|
if ((-not "${env:OLLAMA_SKIP_STATIC_GENERATE}") -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "static"))) {
|
||||||
|
# GCC build for direct linking into the Go binary
|
||||||
|
init_vars
|
||||||
|
# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
|
||||||
|
# as we need this to be compiled by gcc for golang to be able to link with itx
|
||||||
|
write-host "Checking for MinGW..."
|
||||||
|
# error action ensures we exit on failure
|
||||||
|
get-command gcc
|
||||||
|
get-command mingw32-make
|
||||||
|
$oldTargets = $script:cmakeTargets
|
||||||
|
$script:cmakeTargets = @("llama", "ggml")
|
||||||
|
$script:cmakeDefs = @(
|
||||||
|
"-G", "MinGW Makefiles"
|
||||||
|
"-DCMAKE_C_COMPILER=gcc.exe",
|
||||||
|
"-DCMAKE_CXX_COMPILER=g++.exe",
|
||||||
|
"-DBUILD_SHARED_LIBS=off",
|
||||||
|
"-DLLAMA_NATIVE=off",
|
||||||
|
"-DLLAMA_AVX=off",
|
||||||
|
"-DLLAMA_AVX2=off",
|
||||||
|
"-DLLAMA_AVX512=off",
|
||||||
|
"-DLLAMA_F16C=off",
|
||||||
|
"-DLLAMA_FMA=off")
|
||||||
|
$script:buildDir="../build/windows/${script:ARCH}_static"
|
||||||
|
write-host "Building static library"
|
||||||
|
build
|
||||||
|
$script:cmakeTargets = $oldTargets
|
||||||
|
} else {
|
||||||
|
write-host "Skipping CPU generation step as requested"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function build_cpu($gen_arch) {
|
||||||
|
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
|
||||||
|
# remaining llama.cpp builds use MSVC
|
||||||
|
init_vars
|
||||||
|
$script:cmakeDefs = $script:commonCpuDefs + @("-A", $gen_arch, "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||||
|
$script:buildDir="../build/windows/${script:ARCH}/cpu"
|
||||||
|
$script:distDir="$script:DIST_BASE\cpu"
|
||||||
|
write-host "Building LCD CPU"
|
||||||
|
build
|
||||||
|
sign
|
||||||
|
install
|
||||||
|
} else {
|
||||||
|
write-host "Skipping CPU generation step as requested"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function build_cpu_avx() {
|
||||||
|
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx"))) {
|
||||||
|
init_vars
|
||||||
|
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||||
|
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
|
||||||
|
$script:distDir="$script:DIST_BASE\cpu_avx"
|
||||||
|
write-host "Building AVX CPU"
|
||||||
|
build
|
||||||
|
sign
|
||||||
|
install
|
||||||
|
} else {
|
||||||
|
write-host "Skipping CPU AVX generation step as requested"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function build_cpu_avx2() {
|
||||||
|
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx2"))) {
|
||||||
|
init_vars
|
||||||
|
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
|
||||||
|
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
|
||||||
|
$script:distDir="$script:DIST_BASE\cpu_avx2"
|
||||||
|
write-host "Building AVX2 CPU"
|
||||||
|
build
|
||||||
|
sign
|
||||||
|
install
|
||||||
|
} else {
|
||||||
|
write-host "Skipping CPU AVX2 generation step as requested"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function build_cuda() {
|
||||||
|
if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
|
||||||
|
# Then build cuda as a dynamically loaded library
|
||||||
|
$nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
|
||||||
|
$script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
|
||||||
|
if ($null -ne $script:CUDA_VERSION) {
|
||||||
|
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
|
||||||
|
}
|
||||||
|
init_vars
|
||||||
|
$script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
|
||||||
|
$script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
|
||||||
|
$script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
|
||||||
|
if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
|
||||||
|
write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
|
||||||
|
$script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
|
||||||
|
write-host "building custom CUDA GPU"
|
||||||
|
}
|
||||||
|
build
|
||||||
|
sign
|
||||||
|
install
|
||||||
|
|
||||||
|
write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||||
|
cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||||
|
cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||||
|
cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||||
|
} else {
|
||||||
|
write-host "Skipping CUDA generation step"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function build_rocm() {
|
||||||
|
if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
|
||||||
|
$script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
|
||||||
|
if ($null -ne $script:ROCM_VERSION) {
|
||||||
|
$script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
|
||||||
|
}
|
||||||
|
|
||||||
|
init_vars
|
||||||
|
$script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
|
||||||
|
$script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
|
||||||
|
$script:cmakeDefs += @(
|
||||||
|
"-G", "Ninja",
|
||||||
|
"-DCMAKE_C_COMPILER=clang.exe",
|
||||||
|
"-DCMAKE_CXX_COMPILER=clang++.exe",
|
||||||
|
"-DLLAMA_HIPBLAS=on",
|
||||||
|
"-DHIP_PLATFORM=amd",
|
||||||
|
"-DLLAMA_AVX=on",
|
||||||
|
"-DLLAMA_AVX2=off",
|
||||||
|
"-DCMAKE_POSITION_INDEPENDENT_CODE=on",
|
||||||
|
"-DAMDGPU_TARGETS=$(amdGPUs)",
|
||||||
|
"-DGPU_TARGETS=$(amdGPUs)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Make sure the ROCm binary dir is first in the path
|
||||||
|
$env:PATH="$env:HIP_PATH\bin;$env:PATH"
|
||||||
|
|
||||||
|
# We have to clobber the LIB var from the developer shell for clang to work properly
|
||||||
|
$env:LIB=""
|
||||||
|
if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
|
||||||
|
write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
|
||||||
|
$script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
|
||||||
|
write-host "building custom ROCM GPU"
|
||||||
|
}
|
||||||
|
write-host "Building ROCm"
|
||||||
|
build
|
||||||
|
# Ninja doesn't prefix with config name
|
||||||
|
${script:config}=""
|
||||||
|
if ($null -ne $script:DUMPBIN) {
|
||||||
|
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
|
||||||
|
}
|
||||||
|
sign
|
||||||
|
install
|
||||||
|
|
||||||
|
# Assumes v5.7, may need adjustments for v6
|
||||||
|
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
||||||
|
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
|
||||||
|
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
||||||
|
cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
||||||
|
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
|
||||||
|
cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\"
|
||||||
|
} else {
|
||||||
|
write-host "Skipping ROCm generation step"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# GCC build for direct linking into the Go binary
|
|
||||||
init_vars
|
init_vars
|
||||||
# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
|
if ($($args.count) -eq 0) {
|
||||||
# as we need this to be compiled by gcc for golang to be able to link with itx
|
git_module_setup
|
||||||
write-host "Checking for MinGW..."
|
apply_patches
|
||||||
# error action ensures we exit on failure
|
build_static
|
||||||
get-command gcc
|
if ($script:ARCH -eq "arm64") {
|
||||||
get-command mingw32-make
|
build_cpu("ARM64")
|
||||||
$script:cmakeTargets = @("llama", "ggml")
|
} else { # amd64
|
||||||
$script:cmakeDefs = @(
|
build_cpu("x64")
|
||||||
"-G", "MinGW Makefiles"
|
build_cpu_avx
|
||||||
"-DCMAKE_C_COMPILER=gcc.exe",
|
build_cpu_avx2
|
||||||
"-DCMAKE_CXX_COMPILER=g++.exe",
|
build_cuda
|
||||||
"-DBUILD_SHARED_LIBS=off",
|
build_rocm
|
||||||
"-DLLAMA_NATIVE=off",
|
}
|
||||||
"-DLLAMA_AVX=off",
|
|
||||||
"-DLLAMA_AVX2=off",
|
|
||||||
"-DLLAMA_AVX512=off",
|
|
||||||
"-DLLAMA_F16C=off",
|
|
||||||
"-DLLAMA_FMA=off")
|
|
||||||
$script:buildDir="../build/windows/${script:ARCH}_static"
|
|
||||||
write-host "Building static library"
|
|
||||||
build
|
|
||||||
|
|
||||||
# remaining llama.cpp builds use MSVC
|
cleanup
|
||||||
init_vars
|
write-host "`ngo generate completed. LLM runners: $(get-childitem -path $script:DIST_BASE)"
|
||||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
|
||||||
$script:buildDir="../build/windows/${script:ARCH}/cpu"
|
|
||||||
$script:distDir="$script:DIST_BASE\cpu"
|
|
||||||
write-host "Building LCD CPU"
|
|
||||||
build
|
|
||||||
sign
|
|
||||||
install
|
|
||||||
|
|
||||||
init_vars
|
|
||||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
|
||||||
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
|
|
||||||
$script:distDir="$script:DIST_BASE\cpu_avx"
|
|
||||||
write-host "Building AVX CPU"
|
|
||||||
build
|
|
||||||
sign
|
|
||||||
install
|
|
||||||
|
|
||||||
init_vars
|
|
||||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
|
|
||||||
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
|
|
||||||
$script:distDir="$script:DIST_BASE\cpu_avx2"
|
|
||||||
write-host "Building AVX2 CPU"
|
|
||||||
build
|
|
||||||
sign
|
|
||||||
install
|
|
||||||
} else {
|
} else {
|
||||||
write-host "Skipping CPU generation step as requested"
|
for ( $i = 0; $i -lt $args.count; $i++ ) {
|
||||||
}
|
write-host "performing $($args[$i])"
|
||||||
|
& $($args[$i])
|
||||||
if ($null -ne $script:CUDA_LIB_DIR) {
|
}
|
||||||
# Then build cuda as a dynamically loaded library
|
}
|
||||||
$nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
|
|
||||||
$script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
|
|
||||||
if ($null -ne $script:CUDA_VERSION) {
|
|
||||||
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
|
|
||||||
}
|
|
||||||
init_vars
|
|
||||||
$script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
|
|
||||||
$script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
|
|
||||||
$script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
|
|
||||||
if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
|
|
||||||
write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
|
|
||||||
$script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
|
|
||||||
write-host "building custom CUDA GPU"
|
|
||||||
}
|
|
||||||
build
|
|
||||||
sign
|
|
||||||
install
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($null -ne $env:HIP_PATH) {
|
|
||||||
$script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
|
|
||||||
if ($null -ne $script:ROCM_VERSION) {
|
|
||||||
$script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
|
|
||||||
}
|
|
||||||
|
|
||||||
init_vars
|
|
||||||
$script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
|
|
||||||
$script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
|
|
||||||
$script:cmakeDefs += @(
|
|
||||||
"-G", "Ninja",
|
|
||||||
"-DCMAKE_C_COMPILER=clang.exe",
|
|
||||||
"-DCMAKE_CXX_COMPILER=clang++.exe",
|
|
||||||
"-DLLAMA_HIPBLAS=on",
|
|
||||||
"-DHIP_PLATFORM=amd",
|
|
||||||
"-DLLAMA_AVX=on",
|
|
||||||
"-DLLAMA_AVX2=off",
|
|
||||||
"-DCMAKE_POSITION_INDEPENDENT_CODE=on",
|
|
||||||
"-DAMDGPU_TARGETS=$(amdGPUs)",
|
|
||||||
"-DGPU_TARGETS=$(amdGPUs)"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Make sure the ROCm binary dir is first in the path
|
|
||||||
$env:PATH="$env:HIP_PATH\bin;$env:PATH"
|
|
||||||
|
|
||||||
# We have to clobber the LIB var from the developer shell for clang to work properly
|
|
||||||
$env:LIB=""
|
|
||||||
if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
|
|
||||||
write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
|
|
||||||
$script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
|
|
||||||
write-host "building custom ROCM GPU"
|
|
||||||
}
|
|
||||||
write-host "Building ROCm"
|
|
||||||
build
|
|
||||||
# Ninja doesn't prefix with config name
|
|
||||||
${script:config}=""
|
|
||||||
if ($null -ne $script:DUMPBIN) {
|
|
||||||
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
|
|
||||||
}
|
|
||||||
sign
|
|
||||||
install
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
cleanup
|
|
||||||
write-host "`ngo generate completed. LLM runners: $(get-childitem -path $script:DIST_BASE)"
|
|
||||||
12
llm/ggla.go
12
llm/ggla.go
@@ -33,6 +33,7 @@ func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
|
|||||||
|
|
||||||
type ggla struct {
|
type ggla struct {
|
||||||
*containerGGLA
|
*containerGGLA
|
||||||
|
offset int64
|
||||||
|
|
||||||
kv KV
|
kv KV
|
||||||
tensors []*Tensor
|
tensors []*Tensor
|
||||||
@@ -53,6 +54,10 @@ func (llm *ggla) Tensors() Tensors {
|
|||||||
return llm.tensors
|
return llm.tensors
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (llm *ggla) Offset() int64 {
|
||||||
|
return llm.offset
|
||||||
|
}
|
||||||
|
|
||||||
func (llm *ggla) decode(rs io.ReadSeeker) error {
|
func (llm *ggla) decode(rs io.ReadSeeker) error {
|
||||||
var r uint32
|
var r uint32
|
||||||
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
|
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
|
||||||
@@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) error {
|
|||||||
}
|
}
|
||||||
llm.kv["alpha"] = alpha
|
llm.kv["alpha"] = alpha
|
||||||
|
|
||||||
|
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
llm.offset = offset
|
||||||
|
|
||||||
for {
|
for {
|
||||||
var dims uint32
|
var dims uint32
|
||||||
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
|
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
|
||||||
|
|||||||
96
llm/ggml.go
96
llm/ggml.go
@@ -13,85 +13,10 @@ type GGML struct {
|
|||||||
model
|
model
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
|
||||||
fileTypeF32 uint32 = iota
|
|
||||||
fileTypeF16
|
|
||||||
fileTypeQ4_0
|
|
||||||
fileTypeQ4_1
|
|
||||||
fileTypeQ4_1_F16
|
|
||||||
fileTypeQ8_0 uint32 = iota + 2
|
|
||||||
fileTypeQ5_0
|
|
||||||
fileTypeQ5_1
|
|
||||||
fileTypeQ2_K
|
|
||||||
fileTypeQ3_K_S
|
|
||||||
fileTypeQ3_K_M
|
|
||||||
fileTypeQ3_K_L
|
|
||||||
fileTypeQ4_K_S
|
|
||||||
fileTypeQ4_K_M
|
|
||||||
fileTypeQ5_K_S
|
|
||||||
fileTypeQ5_K_M
|
|
||||||
fileTypeQ6_K
|
|
||||||
fileTypeIQ2_XXS
|
|
||||||
fileTypeIQ2_XS
|
|
||||||
fileTypeQ2_K_S
|
|
||||||
fileTypeQ3_K_XS
|
|
||||||
fileTypeIQ3_XXS
|
|
||||||
)
|
|
||||||
|
|
||||||
func fileType(fileType uint32) string {
|
|
||||||
switch fileType {
|
|
||||||
case fileTypeF32:
|
|
||||||
return "F32"
|
|
||||||
case fileTypeF16:
|
|
||||||
return "F16"
|
|
||||||
case fileTypeQ4_0:
|
|
||||||
return "Q4_0"
|
|
||||||
case fileTypeQ4_1:
|
|
||||||
return "Q4_1"
|
|
||||||
case fileTypeQ4_1_F16:
|
|
||||||
return "Q4_1_F16"
|
|
||||||
case fileTypeQ8_0:
|
|
||||||
return "Q8_0"
|
|
||||||
case fileTypeQ5_0:
|
|
||||||
return "Q5_0"
|
|
||||||
case fileTypeQ5_1:
|
|
||||||
return "Q5_1"
|
|
||||||
case fileTypeQ2_K:
|
|
||||||
return "Q2_K"
|
|
||||||
case fileTypeQ3_K_S:
|
|
||||||
return "Q3_K_S"
|
|
||||||
case fileTypeQ3_K_M:
|
|
||||||
return "Q3_K_M"
|
|
||||||
case fileTypeQ3_K_L:
|
|
||||||
return "Q3_K_L"
|
|
||||||
case fileTypeQ4_K_S:
|
|
||||||
return "Q4_K_S"
|
|
||||||
case fileTypeQ4_K_M:
|
|
||||||
return "Q4_K_M"
|
|
||||||
case fileTypeQ5_K_S:
|
|
||||||
return "Q5_K_S"
|
|
||||||
case fileTypeQ5_K_M:
|
|
||||||
return "Q5_K_M"
|
|
||||||
case fileTypeQ6_K:
|
|
||||||
return "Q6_K"
|
|
||||||
case fileTypeIQ2_XXS:
|
|
||||||
return "IQ2_XXS"
|
|
||||||
case fileTypeIQ2_XS:
|
|
||||||
return "IQ2_XS"
|
|
||||||
case fileTypeQ2_K_S:
|
|
||||||
return "Q2_K_S"
|
|
||||||
case fileTypeQ3_K_XS:
|
|
||||||
return "Q3_K_XS"
|
|
||||||
case fileTypeIQ3_XXS:
|
|
||||||
return "IQ3_XXS"
|
|
||||||
default:
|
|
||||||
return "unknown"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type model interface {
|
type model interface {
|
||||||
KV() KV
|
KV() KV
|
||||||
Tensors() Tensors
|
Tensors() Tensors
|
||||||
|
Offset() int64
|
||||||
}
|
}
|
||||||
|
|
||||||
type KV map[string]any
|
type KV map[string]any
|
||||||
@@ -123,7 +48,7 @@ func (kv KV) ParameterCount() uint64 {
|
|||||||
|
|
||||||
func (kv KV) FileType() string {
|
func (kv KV) FileType() string {
|
||||||
if u64 := kv.u64("general.file_type"); u64 > 0 {
|
if u64 := kv.u64("general.file_type"); u64 > 0 {
|
||||||
return fileType(uint32(u64))
|
return fileType(uint32(u64)).String()
|
||||||
}
|
}
|
||||||
|
|
||||||
return "unknown"
|
return "unknown"
|
||||||
@@ -286,6 +211,23 @@ const (
|
|||||||
|
|
||||||
var ErrUnsupportedFormat = errors.New("unsupported model format")
|
var ErrUnsupportedFormat = errors.New("unsupported model format")
|
||||||
|
|
||||||
|
func DetectGGMLType(b []byte) string {
|
||||||
|
switch binary.LittleEndian.Uint32(b[:4]) {
|
||||||
|
case FILE_MAGIC_GGML:
|
||||||
|
return "ggml"
|
||||||
|
case FILE_MAGIC_GGMF:
|
||||||
|
return "ggmf"
|
||||||
|
case FILE_MAGIC_GGJT:
|
||||||
|
return "ggjt"
|
||||||
|
case FILE_MAGIC_GGLA:
|
||||||
|
return "ggla"
|
||||||
|
case FILE_MAGIC_GGUF_LE, FILE_MAGIC_GGUF_BE:
|
||||||
|
return "gguf"
|
||||||
|
default:
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) {
|
func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) {
|
||||||
var magic uint32
|
var magic uint32
|
||||||
if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil {
|
if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil {
|
||||||
|
|||||||
11
llm/gguf.go
11
llm/gguf.go
@@ -55,7 +55,7 @@ func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
|
|||||||
|
|
||||||
model := newGGUF(c)
|
model := newGGUF(c)
|
||||||
slog.Debug(fmt.Sprintf("model = %#v", model))
|
slog.Debug(fmt.Sprintf("model = %#v", model))
|
||||||
if err := model.Decode(rs); err != nil {
|
if err := model.decode(rs); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -90,6 +90,7 @@ const (
|
|||||||
|
|
||||||
type gguf struct {
|
type gguf struct {
|
||||||
*containerGGUF
|
*containerGGUF
|
||||||
|
offset int64
|
||||||
|
|
||||||
kv KV
|
kv KV
|
||||||
tensors []*Tensor
|
tensors []*Tensor
|
||||||
@@ -116,6 +117,10 @@ func (llm *gguf) Tensors() Tensors {
|
|||||||
return llm.tensors
|
return llm.tensors
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (llm *gguf) Offset() int64 {
|
||||||
|
return llm.offset
|
||||||
|
}
|
||||||
|
|
||||||
func (llm *gguf) numTensor() uint64 {
|
func (llm *gguf) numTensor() uint64 {
|
||||||
switch llm.Version {
|
switch llm.Version {
|
||||||
case 1:
|
case 1:
|
||||||
@@ -138,7 +143,7 @@ func (llm *gguf) numKV() uint64 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
func (llm *gguf) decode(rs io.ReadSeeker) error {
|
||||||
// decode key-values
|
// decode key-values
|
||||||
for i := 0; uint64(i) < llm.numKV(); i++ {
|
for i := 0; uint64(i) < llm.numKV(); i++ {
|
||||||
k, err := readGGUFString(llm, rs)
|
k, err := readGGUFString(llm, rs)
|
||||||
@@ -250,6 +255,8 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llm.offset = offset + padding
|
||||||
|
|
||||||
for _, tensor := range llm.tensors {
|
for _, tensor := range llm.tensors {
|
||||||
if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
|
if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|||||||
Submodule llm/llama.cpp updated: 7593639ce3...952d03dbea
57
llm/llm.go
57
llm/llm.go
@@ -4,6 +4,7 @@ package llm
|
|||||||
// #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++
|
// #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++
|
||||||
// #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++
|
// #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++
|
||||||
// #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
|
// #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
|
||||||
|
// #cgo windows,arm64 LDFLAGS: ${SRCDIR}/build/windows/arm64_static/libllama.a -static -lstdc++
|
||||||
// #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
|
// #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
|
||||||
// #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
|
// #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
|
||||||
// #include <stdlib.h>
|
// #include <stdlib.h>
|
||||||
@@ -19,7 +20,7 @@ func SystemInfo() string {
|
|||||||
return C.GoString(C.llama_print_system_info())
|
return C.GoString(C.llama_print_system_info())
|
||||||
}
|
}
|
||||||
|
|
||||||
func Quantize(infile, outfile, filetype string) error {
|
func Quantize(infile, outfile string, ftype fileType) error {
|
||||||
cinfile := C.CString(infile)
|
cinfile := C.CString(infile)
|
||||||
defer C.free(unsafe.Pointer(cinfile))
|
defer C.free(unsafe.Pointer(cinfile))
|
||||||
|
|
||||||
@@ -28,58 +29,10 @@ func Quantize(infile, outfile, filetype string) error {
|
|||||||
|
|
||||||
params := C.llama_model_quantize_default_params()
|
params := C.llama_model_quantize_default_params()
|
||||||
params.nthread = -1
|
params.nthread = -1
|
||||||
|
params.ftype = ftype.Value()
|
||||||
|
|
||||||
switch filetype {
|
if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 {
|
||||||
case "F32":
|
return fmt.Errorf("llama_model_quantize: %d", rc)
|
||||||
params.ftype = fileTypeF32
|
|
||||||
case "F16":
|
|
||||||
params.ftype = fileTypeF16
|
|
||||||
case "Q4_0":
|
|
||||||
params.ftype = fileTypeQ4_0
|
|
||||||
case "Q4_1":
|
|
||||||
params.ftype = fileTypeQ4_1
|
|
||||||
case "Q4_1_F16":
|
|
||||||
params.ftype = fileTypeQ4_1_F16
|
|
||||||
case "Q8_0":
|
|
||||||
params.ftype = fileTypeQ8_0
|
|
||||||
case "Q5_0":
|
|
||||||
params.ftype = fileTypeQ5_0
|
|
||||||
case "Q5_1":
|
|
||||||
params.ftype = fileTypeQ5_1
|
|
||||||
case "Q2_K":
|
|
||||||
params.ftype = fileTypeQ2_K
|
|
||||||
case "Q3_K_S":
|
|
||||||
params.ftype = fileTypeQ3_K_S
|
|
||||||
case "Q3_K_M":
|
|
||||||
params.ftype = fileTypeQ3_K_M
|
|
||||||
case "Q3_K_L":
|
|
||||||
params.ftype = fileTypeQ3_K_L
|
|
||||||
case "Q4_K_S":
|
|
||||||
params.ftype = fileTypeQ4_K_S
|
|
||||||
case "Q4_K_M":
|
|
||||||
params.ftype = fileTypeQ4_K_M
|
|
||||||
case "Q5_K_S":
|
|
||||||
params.ftype = fileTypeQ5_K_S
|
|
||||||
case "Q5_K_M":
|
|
||||||
params.ftype = fileTypeQ5_K_M
|
|
||||||
case "Q6_K":
|
|
||||||
params.ftype = fileTypeQ6_K
|
|
||||||
case "IQ2_XXS":
|
|
||||||
params.ftype = fileTypeIQ2_XXS
|
|
||||||
case "IQ2_XS":
|
|
||||||
params.ftype = fileTypeIQ2_XS
|
|
||||||
case "Q2_K_S":
|
|
||||||
params.ftype = fileTypeQ2_K_S
|
|
||||||
case "Q3_K_XS":
|
|
||||||
params.ftype = fileTypeQ3_K_XS
|
|
||||||
case "IQ3_XXS":
|
|
||||||
params.ftype = fileTypeIQ3_XXS
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("unknown filetype: %s", filetype)
|
|
||||||
}
|
|
||||||
|
|
||||||
if retval := C.llama_model_quantize(cinfile, coutfile, ¶ms); retval != 0 {
|
|
||||||
return fmt.Errorf("llama_model_quantize: %d", retval)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -88,6 +88,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
|||||||
graphFullOffload *= uint64(len(gpus))
|
graphFullOffload *= uint64(len(gpus))
|
||||||
graphPartialOffload *= uint64(len(gpus))
|
graphPartialOffload *= uint64(len(gpus))
|
||||||
|
|
||||||
|
// on metal there's no partial offload overhead
|
||||||
|
if gpus[0].Library == "metal" {
|
||||||
|
graphPartialOffload = graphFullOffload
|
||||||
|
}
|
||||||
|
|
||||||
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
|
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
|
||||||
memoryRequiredTotal := memoryMinimum + graphFullOffload
|
memoryRequiredTotal := memoryMinimum + graphFullOffload
|
||||||
|
|
||||||
@@ -102,10 +107,14 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
|||||||
layers := ggml.Tensors().Layers()
|
layers := ggml.Tensors().Layers()
|
||||||
|
|
||||||
var memoryLayerOutput uint64
|
var memoryLayerOutput uint64
|
||||||
for k, v := range layers {
|
if layer, ok := layers["output_norm"]; ok {
|
||||||
if k == "output" || k == "output_norm" {
|
memoryLayerOutput += layer.size()
|
||||||
memoryLayerOutput += v.size()
|
}
|
||||||
}
|
|
||||||
|
if layer, ok := layers["output"]; ok {
|
||||||
|
memoryLayerOutput += layer.size()
|
||||||
|
} else if layer, ok := layers["token_embd"]; ok {
|
||||||
|
memoryLayerOutput += layer.size()
|
||||||
}
|
}
|
||||||
|
|
||||||
if gpus[0].Library == "metal" && opts.UseMMap {
|
if gpus[0].Library == "metal" && opts.UseMMap {
|
||||||
|
|||||||
12
llm/patches/02-clip-log.diff
Normal file
12
llm/patches/02-clip-log.diff
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||||
|
index e431c7f7..f077e688 100644
|
||||||
|
--- a/examples/llava/clip.cpp
|
||||||
|
+++ b/examples/llava/clip.cpp
|
||||||
|
@@ -3,6 +3,7 @@
|
||||||
|
// I'll gradually clean and extend it
|
||||||
|
// Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch
|
||||||
|
#include "clip.h"
|
||||||
|
+#include "common.h"
|
||||||
|
#include "log.h"
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-alloc.h"
|
||||||
@@ -73,8 +73,7 @@ func LoadModel(model string) (*GGML, error) {
|
|||||||
func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options) (LlamaServer, error) {
|
func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options) (LlamaServer, error) {
|
||||||
var err error
|
var err error
|
||||||
if opts.NumCtx > int(ggml.KV().ContextLength()) {
|
if opts.NumCtx > int(ggml.KV().ContextLength()) {
|
||||||
slog.Warn("requested context length is greater than model max context length", "requested", opts.NumCtx, "model", ggml.KV().ContextLength())
|
slog.Warn("requested context length is greater than the model's training context window size", "requested", opts.NumCtx, "training size", ggml.KV().ContextLength())
|
||||||
opts.NumCtx = int(ggml.KV().ContextLength())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.NumCtx < 4 {
|
if opts.NumCtx < 4 {
|
||||||
@@ -301,12 +300,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// reap subprocess when it exits
|
|
||||||
go func() {
|
|
||||||
// Exit status managed via getServerStatus
|
|
||||||
_ = s.cmd.Wait()
|
|
||||||
}()
|
|
||||||
|
|
||||||
// TODO - make sure this is all wired up correctly
|
// TODO - make sure this is all wired up correctly
|
||||||
// if err = s.WaitUntilRunning(); err != nil {
|
// if err = s.WaitUntilRunning(); err != nil {
|
||||||
// slog.Error("error starting llama server", "server", servers[i], "error", err)
|
// slog.Error("error starting llama server", "server", servers[i], "error", err)
|
||||||
@@ -442,7 +435,7 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
|
|||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
slog.Info("context expired before server started")
|
slog.Info("context expired before server started")
|
||||||
return fmt.Errorf("timed out waiting for llama runner to start")
|
return fmt.Errorf("timed out waiting for llama runner to start: %w", ctx.Err())
|
||||||
case err := <-s.done:
|
case err := <-s.done:
|
||||||
msg := ""
|
msg := ""
|
||||||
if s.status != nil && s.status.LastErrMsg != "" {
|
if s.status != nil && s.status.LastErrMsg != "" {
|
||||||
@@ -900,7 +893,13 @@ func (s *llmServer) Detokenize(ctx context.Context, tokens []int) (string, error
|
|||||||
func (s *llmServer) Close() error {
|
func (s *llmServer) Close() error {
|
||||||
if s.cmd != nil {
|
if s.cmd != nil {
|
||||||
slog.Debug("stopping llama server")
|
slog.Debug("stopping llama server")
|
||||||
return s.cmd.Process.Kill()
|
if err := s.cmd.Process.Kill(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = s.cmd.Wait()
|
||||||
|
|
||||||
|
slog.Debug("llama server stopped")
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ export default function () {
|
|||||||
const [step, setStep] = useState<Step>(Step.WELCOME)
|
const [step, setStep] = useState<Step>(Step.WELCOME)
|
||||||
const [commandCopied, setCommandCopied] = useState<boolean>(false)
|
const [commandCopied, setCommandCopied] = useState<boolean>(false)
|
||||||
|
|
||||||
const command = 'ollama run llama2'
|
const command = 'ollama run llama3'
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className='drag'>
|
<div className='drag'>
|
||||||
|
|||||||
363
parser/parser.go
363
parser/parser.go
@@ -6,8 +6,8 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"strconv"
|
||||||
"slices"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Command struct {
|
type Command struct {
|
||||||
@@ -15,118 +15,283 @@ type Command struct {
|
|||||||
Args string
|
Args string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Command) Reset() {
|
type state int
|
||||||
c.Name = ""
|
|
||||||
c.Args = ""
|
|
||||||
}
|
|
||||||
|
|
||||||
func Parse(reader io.Reader) ([]Command, error) {
|
const (
|
||||||
var commands []Command
|
stateNil state = iota
|
||||||
var command, modelCommand Command
|
stateName
|
||||||
|
stateValue
|
||||||
|
stateParameter
|
||||||
|
stateMessage
|
||||||
|
stateComment
|
||||||
|
)
|
||||||
|
|
||||||
scanner := bufio.NewScanner(reader)
|
var (
|
||||||
scanner.Buffer(make([]byte, 0, bufio.MaxScanTokenSize), bufio.MaxScanTokenSize)
|
errMissingFrom = errors.New("no FROM line")
|
||||||
scanner.Split(scanModelfile)
|
errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"")
|
||||||
for scanner.Scan() {
|
errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
|
||||||
line := scanner.Bytes()
|
)
|
||||||
|
|
||||||
fields := bytes.SplitN(line, []byte(" "), 2)
|
func Format(cmds []Command) string {
|
||||||
if len(fields) == 0 || len(fields[0]) == 0 {
|
var sb strings.Builder
|
||||||
continue
|
for _, cmd := range cmds {
|
||||||
}
|
name := cmd.Name
|
||||||
|
args := cmd.Args
|
||||||
|
|
||||||
switch string(bytes.ToUpper(fields[0])) {
|
switch cmd.Name {
|
||||||
case "FROM":
|
case "model":
|
||||||
command.Name = "model"
|
name = "from"
|
||||||
command.Args = string(bytes.TrimSpace(fields[1]))
|
args = cmd.Args
|
||||||
// copy command for validation
|
case "license", "template", "system", "adapter":
|
||||||
modelCommand = command
|
args = quote(args)
|
||||||
case "ADAPTER":
|
case "message":
|
||||||
command.Name = string(bytes.ToLower(fields[0]))
|
role, message, _ := strings.Cut(cmd.Args, ": ")
|
||||||
command.Args = string(bytes.TrimSpace(fields[1]))
|
args = role + " " + quote(message)
|
||||||
case "LICENSE", "TEMPLATE", "SYSTEM", "PROMPT":
|
|
||||||
command.Name = string(bytes.ToLower(fields[0]))
|
|
||||||
command.Args = string(fields[1])
|
|
||||||
case "PARAMETER":
|
|
||||||
fields = bytes.SplitN(fields[1], []byte(" "), 2)
|
|
||||||
if len(fields) < 2 {
|
|
||||||
return nil, fmt.Errorf("missing value for %s", fields)
|
|
||||||
}
|
|
||||||
|
|
||||||
command.Name = string(fields[0])
|
|
||||||
command.Args = string(bytes.TrimSpace(fields[1]))
|
|
||||||
case "EMBED":
|
|
||||||
return nil, fmt.Errorf("deprecated command: EMBED is no longer supported, use the /embed API endpoint instead")
|
|
||||||
case "MESSAGE":
|
|
||||||
command.Name = string(bytes.ToLower(fields[0]))
|
|
||||||
fields = bytes.SplitN(fields[1], []byte(" "), 2)
|
|
||||||
if len(fields) < 2 {
|
|
||||||
return nil, fmt.Errorf("should be in the format <role> <message>")
|
|
||||||
}
|
|
||||||
if !slices.Contains([]string{"system", "user", "assistant"}, string(bytes.ToLower(fields[0]))) {
|
|
||||||
return nil, fmt.Errorf("role must be one of \"system\", \"user\", or \"assistant\"")
|
|
||||||
}
|
|
||||||
command.Args = fmt.Sprintf("%s: %s", string(bytes.ToLower(fields[0])), string(fields[1]))
|
|
||||||
default:
|
default:
|
||||||
if !bytes.HasPrefix(fields[0], []byte("#")) {
|
name = "parameter"
|
||||||
// log a warning for unknown commands
|
args = cmd.Name + " " + quote(cmd.Args)
|
||||||
slog.Warn(fmt.Sprintf("Unknown command: %s", fields[0]))
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
commands = append(commands, command)
|
fmt.Fprintln(&sb, strings.ToUpper(name), args)
|
||||||
command.Reset()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if modelCommand.Args == "" {
|
return sb.String()
|
||||||
return nil, errors.New("no FROM line for the model was specified")
|
|
||||||
}
|
|
||||||
|
|
||||||
return commands, scanner.Err()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func scanModelfile(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
func Parse(r io.Reader) (cmds []Command, err error) {
|
||||||
advance, token, err = scan([]byte(`"""`), []byte(`"""`), data, atEOF)
|
var cmd Command
|
||||||
if err != nil {
|
var curr state
|
||||||
return 0, nil, err
|
var b bytes.Buffer
|
||||||
}
|
var role string
|
||||||
|
|
||||||
if advance > 0 && token != nil {
|
br := bufio.NewReader(r)
|
||||||
return advance, token, nil
|
for {
|
||||||
}
|
r, _, err := br.ReadRune()
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
advance, token, err = scan([]byte(`"`), []byte(`"`), data, atEOF)
|
break
|
||||||
if err != nil {
|
} else if err != nil {
|
||||||
return 0, nil, err
|
return nil, err
|
||||||
}
|
|
||||||
|
|
||||||
if advance > 0 && token != nil {
|
|
||||||
return advance, token, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return bufio.ScanLines(data, atEOF)
|
|
||||||
}
|
|
||||||
|
|
||||||
func scan(openBytes, closeBytes, data []byte, atEOF bool) (advance int, token []byte, err error) {
|
|
||||||
newline := bytes.IndexByte(data, '\n')
|
|
||||||
|
|
||||||
if start := bytes.Index(data, openBytes); start >= 0 && start < newline {
|
|
||||||
end := bytes.Index(data[start+len(openBytes):], closeBytes)
|
|
||||||
if end < 0 {
|
|
||||||
if atEOF {
|
|
||||||
return 0, nil, fmt.Errorf("unterminated %s: expecting %s", openBytes, closeBytes)
|
|
||||||
} else {
|
|
||||||
return 0, nil, nil
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
n := start + len(openBytes) + end + len(closeBytes)
|
next, r, err := parseRuneForState(r, curr)
|
||||||
|
if errors.Is(err, io.ErrUnexpectedEOF) {
|
||||||
|
return nil, fmt.Errorf("%w: %s", err, b.String())
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
newData := data[:start]
|
// process the state transition, some transitions need to be intercepted and redirected
|
||||||
newData = append(newData, data[start+len(openBytes):n-len(closeBytes)]...)
|
if next != curr {
|
||||||
return n, newData, nil
|
switch curr {
|
||||||
|
case stateName:
|
||||||
|
if !isValidCommand(b.String()) {
|
||||||
|
return nil, errInvalidCommand
|
||||||
|
}
|
||||||
|
|
||||||
|
// next state sometimes depends on the current buffer value
|
||||||
|
switch s := strings.ToLower(b.String()); s {
|
||||||
|
case "from":
|
||||||
|
cmd.Name = "model"
|
||||||
|
case "parameter":
|
||||||
|
// transition to stateParameter which sets command name
|
||||||
|
next = stateParameter
|
||||||
|
case "message":
|
||||||
|
// transition to stateMessage which validates the message role
|
||||||
|
next = stateMessage
|
||||||
|
fallthrough
|
||||||
|
default:
|
||||||
|
cmd.Name = s
|
||||||
|
}
|
||||||
|
case stateParameter:
|
||||||
|
cmd.Name = b.String()
|
||||||
|
case stateMessage:
|
||||||
|
if !isValidMessageRole(b.String()) {
|
||||||
|
return nil, errInvalidMessageRole
|
||||||
|
}
|
||||||
|
|
||||||
|
role = b.String()
|
||||||
|
case stateComment, stateNil:
|
||||||
|
// pass
|
||||||
|
case stateValue:
|
||||||
|
s, ok := unquote(b.String())
|
||||||
|
if !ok || isSpace(r) {
|
||||||
|
if _, err := b.WriteRune(r); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if role != "" {
|
||||||
|
s = role + ": " + s
|
||||||
|
role = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd.Args = s
|
||||||
|
cmds = append(cmds, cmd)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.Reset()
|
||||||
|
curr = next
|
||||||
|
}
|
||||||
|
|
||||||
|
if strconv.IsPrint(r) {
|
||||||
|
if _, err := b.WriteRune(r); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0, nil, nil
|
// flush the buffer
|
||||||
|
switch curr {
|
||||||
|
case stateComment, stateNil:
|
||||||
|
// pass; nothing to flush
|
||||||
|
case stateValue:
|
||||||
|
s, ok := unquote(b.String())
|
||||||
|
if !ok {
|
||||||
|
return nil, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
|
||||||
|
if role != "" {
|
||||||
|
s = role + ": " + s
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd.Args = s
|
||||||
|
cmds = append(cmds, cmd)
|
||||||
|
default:
|
||||||
|
return nil, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cmd := range cmds {
|
||||||
|
if cmd.Name == "model" {
|
||||||
|
return cmds, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, errMissingFrom
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseRuneForState(r rune, cs state) (state, rune, error) {
|
||||||
|
switch cs {
|
||||||
|
case stateNil:
|
||||||
|
switch {
|
||||||
|
case r == '#':
|
||||||
|
return stateComment, 0, nil
|
||||||
|
case isSpace(r), isNewline(r):
|
||||||
|
return stateNil, 0, nil
|
||||||
|
default:
|
||||||
|
return stateName, r, nil
|
||||||
|
}
|
||||||
|
case stateName:
|
||||||
|
switch {
|
||||||
|
case isAlpha(r):
|
||||||
|
return stateName, r, nil
|
||||||
|
case isSpace(r):
|
||||||
|
return stateValue, 0, nil
|
||||||
|
default:
|
||||||
|
return stateNil, 0, errInvalidCommand
|
||||||
|
}
|
||||||
|
case stateValue:
|
||||||
|
switch {
|
||||||
|
case isNewline(r):
|
||||||
|
return stateNil, r, nil
|
||||||
|
case isSpace(r):
|
||||||
|
return stateNil, r, nil
|
||||||
|
default:
|
||||||
|
return stateValue, r, nil
|
||||||
|
}
|
||||||
|
case stateParameter:
|
||||||
|
switch {
|
||||||
|
case isAlpha(r), isNumber(r), r == '_':
|
||||||
|
return stateParameter, r, nil
|
||||||
|
case isSpace(r):
|
||||||
|
return stateValue, 0, nil
|
||||||
|
default:
|
||||||
|
return stateNil, 0, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
case stateMessage:
|
||||||
|
switch {
|
||||||
|
case isAlpha(r):
|
||||||
|
return stateMessage, r, nil
|
||||||
|
case isSpace(r):
|
||||||
|
return stateValue, 0, nil
|
||||||
|
default:
|
||||||
|
return stateNil, 0, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
case stateComment:
|
||||||
|
switch {
|
||||||
|
case isNewline(r):
|
||||||
|
return stateNil, 0, nil
|
||||||
|
default:
|
||||||
|
return stateComment, 0, nil
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return stateNil, 0, errors.New("")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func quote(s string) string {
|
||||||
|
if strings.Contains(s, "\n") || strings.HasPrefix(s, " ") || strings.HasSuffix(s, " ") {
|
||||||
|
if strings.Contains(s, "\"") {
|
||||||
|
return `"""` + s + `"""`
|
||||||
|
}
|
||||||
|
|
||||||
|
return `"` + s + `"`
|
||||||
|
}
|
||||||
|
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func unquote(s string) (string, bool) {
|
||||||
|
if len(s) == 0 {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: single quotes
|
||||||
|
if len(s) >= 3 && s[:3] == `"""` {
|
||||||
|
if len(s) >= 6 && s[len(s)-3:] == `"""` {
|
||||||
|
return s[3 : len(s)-3], true
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(s) >= 1 && s[0] == '"' {
|
||||||
|
if len(s) >= 2 && s[len(s)-1] == '"' {
|
||||||
|
return s[1 : len(s)-1], true
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
return s, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func isAlpha(r rune) bool {
|
||||||
|
return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
|
||||||
|
}
|
||||||
|
|
||||||
|
func isNumber(r rune) bool {
|
||||||
|
return r >= '0' && r <= '9'
|
||||||
|
}
|
||||||
|
|
||||||
|
func isSpace(r rune) bool {
|
||||||
|
return r == ' ' || r == '\t'
|
||||||
|
}
|
||||||
|
|
||||||
|
func isNewline(r rune) bool {
|
||||||
|
return r == '\r' || r == '\n'
|
||||||
|
}
|
||||||
|
|
||||||
|
func isValidMessageRole(role string) bool {
|
||||||
|
return role == "system" || role == "user" || role == "assistant"
|
||||||
|
}
|
||||||
|
|
||||||
|
func isValidCommand(cmd string) bool {
|
||||||
|
switch strings.ToLower(cmd) {
|
||||||
|
case "from", "license", "template", "system", "adapter", "parameter", "message":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,14 +1,16 @@
|
|||||||
package parser
|
package parser
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Test_Parser(t *testing.T) {
|
func TestParser(t *testing.T) {
|
||||||
|
|
||||||
input := `
|
input := `
|
||||||
FROM model1
|
FROM model1
|
||||||
ADAPTER adapter1
|
ADAPTER adapter1
|
||||||
@@ -35,21 +37,62 @@ TEMPLATE template1
|
|||||||
assert.Equal(t, expectedCommands, commands)
|
assert.Equal(t, expectedCommands, commands)
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_Parser_NoFromLine(t *testing.T) {
|
func TestParserFrom(t *testing.T) {
|
||||||
|
var cases = []struct {
|
||||||
|
input string
|
||||||
|
expected []Command
|
||||||
|
err error
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
"FROM foo",
|
||||||
|
[]Command{{Name: "model", Args: "foo"}},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"FROM /path/to/model",
|
||||||
|
[]Command{{Name: "model", Args: "/path/to/model"}},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"FROM /path/to/model/fp16.bin",
|
||||||
|
[]Command{{Name: "model", Args: "/path/to/model/fp16.bin"}},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"FROM llama3:latest",
|
||||||
|
[]Command{{Name: "model", Args: "llama3:latest"}},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"FROM llama3:7b-instruct-q4_K_M",
|
||||||
|
[]Command{{Name: "model", Args: "llama3:7b-instruct-q4_K_M"}},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"", nil, errMissingFrom,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"PARAMETER param1 value1",
|
||||||
|
nil,
|
||||||
|
errMissingFrom,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"PARAMETER param1 value1\nFROM foo",
|
||||||
|
[]Command{{Name: "param1", Args: "value1"}, {Name: "model", Args: "foo"}},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
input := `
|
for _, c := range cases {
|
||||||
PARAMETER param1 value1
|
t.Run("", func(t *testing.T) {
|
||||||
PARAMETER param2 value2
|
commands, err := Parse(strings.NewReader(c.input))
|
||||||
`
|
assert.ErrorIs(t, err, c.err)
|
||||||
|
assert.Equal(t, c.expected, commands)
|
||||||
reader := strings.NewReader(input)
|
})
|
||||||
|
}
|
||||||
_, err := Parse(reader)
|
|
||||||
assert.ErrorContains(t, err, "no FROM line")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_Parser_MissingValue(t *testing.T) {
|
func TestParserParametersMissingValue(t *testing.T) {
|
||||||
|
|
||||||
input := `
|
input := `
|
||||||
FROM foo
|
FROM foo
|
||||||
PARAMETER param1
|
PARAMETER param1
|
||||||
@@ -58,41 +101,401 @@ PARAMETER param1
|
|||||||
reader := strings.NewReader(input)
|
reader := strings.NewReader(input)
|
||||||
|
|
||||||
_, err := Parse(reader)
|
_, err := Parse(reader)
|
||||||
assert.ErrorContains(t, err, "missing value for [param1]")
|
assert.ErrorIs(t, err, io.ErrUnexpectedEOF)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParserBadCommand(t *testing.T) {
|
||||||
|
input := `
|
||||||
|
FROM foo
|
||||||
|
BADCOMMAND param1 value1
|
||||||
|
`
|
||||||
|
_, err := Parse(strings.NewReader(input))
|
||||||
|
assert.ErrorIs(t, err, errInvalidCommand)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_Parser_Messages(t *testing.T) {
|
func TestParserMessages(t *testing.T) {
|
||||||
|
var cases = []struct {
|
||||||
input := `
|
input string
|
||||||
|
expected []Command
|
||||||
|
err error
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
MESSAGE system You are a Parser. Always Parse things.
|
||||||
|
`,
|
||||||
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
{Name: "message", Args: "system: You are a Parser. Always Parse things."},
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
MESSAGE system You are a Parser. Always Parse things.`,
|
||||||
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
{Name: "message", Args: "system: You are a Parser. Always Parse things."},
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
FROM foo
|
FROM foo
|
||||||
MESSAGE system You are a Parser. Always Parse things.
|
MESSAGE system You are a Parser. Always Parse things.
|
||||||
MESSAGE user Hey there!
|
MESSAGE user Hey there!
|
||||||
MESSAGE assistant Hello, I want to parse all the things!
|
MESSAGE assistant Hello, I want to parse all the things!
|
||||||
`
|
`,
|
||||||
|
[]Command{
|
||||||
reader := strings.NewReader(input)
|
{Name: "model", Args: "foo"},
|
||||||
commands, err := Parse(reader)
|
{Name: "message", Args: "system: You are a Parser. Always Parse things."},
|
||||||
assert.Nil(t, err)
|
{Name: "message", Args: "user: Hey there!"},
|
||||||
|
{Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
|
||||||
expectedCommands := []Command{
|
},
|
||||||
{Name: "model", Args: "foo"},
|
nil,
|
||||||
{Name: "message", Args: "system: You are a Parser. Always Parse things."},
|
},
|
||||||
{Name: "message", Args: "user: Hey there!"},
|
{
|
||||||
{Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
|
`
|
||||||
}
|
FROM foo
|
||||||
|
MESSAGE system """
|
||||||
assert.Equal(t, expectedCommands, commands)
|
You are a multiline Parser. Always Parse things.
|
||||||
}
|
"""
|
||||||
|
`,
|
||||||
func Test_Parser_Messages_BadRole(t *testing.T) {
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
input := `
|
{Name: "message", Args: "system: \nYou are a multiline Parser. Always Parse things.\n"},
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
FROM foo
|
FROM foo
|
||||||
MESSAGE badguy I'm a bad guy!
|
MESSAGE badguy I'm a bad guy!
|
||||||
`
|
`,
|
||||||
|
nil,
|
||||||
|
errInvalidMessageRole,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
MESSAGE system
|
||||||
|
`,
|
||||||
|
nil,
|
||||||
|
io.ErrUnexpectedEOF,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
MESSAGE system`,
|
||||||
|
nil,
|
||||||
|
io.ErrUnexpectedEOF,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run("", func(t *testing.T) {
|
||||||
|
commands, err := Parse(strings.NewReader(c.input))
|
||||||
|
assert.ErrorIs(t, err, c.err)
|
||||||
|
assert.Equal(t, c.expected, commands)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParserQuoted(t *testing.T) {
|
||||||
|
var cases = []struct {
|
||||||
|
multiline string
|
||||||
|
expected []Command
|
||||||
|
err error
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
SYSTEM """
|
||||||
|
This is a
|
||||||
|
multiline system.
|
||||||
|
"""
|
||||||
|
`,
|
||||||
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
{Name: "system", Args: "\nThis is a\nmultiline system.\n"},
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
SYSTEM """
|
||||||
|
This is a
|
||||||
|
multiline system."""
|
||||||
|
`,
|
||||||
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
{Name: "system", Args: "\nThis is a\nmultiline system."},
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
SYSTEM """This is a
|
||||||
|
multiline system."""
|
||||||
|
`,
|
||||||
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
{Name: "system", Args: "This is a\nmultiline system."},
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
SYSTEM """This is a multiline system."""
|
||||||
|
`,
|
||||||
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
{Name: "system", Args: "This is a multiline system."},
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
SYSTEM """This is a multiline system.""
|
||||||
|
`,
|
||||||
|
nil,
|
||||||
|
io.ErrUnexpectedEOF,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
SYSTEM "
|
||||||
|
`,
|
||||||
|
nil,
|
||||||
|
io.ErrUnexpectedEOF,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
SYSTEM """
|
||||||
|
This is a multiline system with "quotes".
|
||||||
|
"""
|
||||||
|
`,
|
||||||
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
{Name: "system", Args: "\nThis is a multiline system with \"quotes\".\n"},
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
SYSTEM """"""
|
||||||
|
`,
|
||||||
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
{Name: "system", Args: ""},
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
SYSTEM ""
|
||||||
|
`,
|
||||||
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
{Name: "system", Args: ""},
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
SYSTEM "'"
|
||||||
|
`,
|
||||||
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
{Name: "system", Args: "'"},
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
SYSTEM """''"'""'""'"'''''""'""'"""
|
||||||
|
`,
|
||||||
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
{Name: "system", Args: `''"'""'""'"'''''""'""'`},
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
TEMPLATE """
|
||||||
|
{{ .Prompt }}
|
||||||
|
"""`,
|
||||||
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
{Name: "template", Args: "\n{{ .Prompt }}\n"},
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run("", func(t *testing.T) {
|
||||||
|
commands, err := Parse(strings.NewReader(c.multiline))
|
||||||
|
assert.ErrorIs(t, err, c.err)
|
||||||
|
assert.Equal(t, c.expected, commands)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParserParameters(t *testing.T) {
|
||||||
|
var cases = map[string]struct {
|
||||||
|
name, value string
|
||||||
|
}{
|
||||||
|
"numa true": {"numa", "true"},
|
||||||
|
"num_ctx 1": {"num_ctx", "1"},
|
||||||
|
"num_batch 1": {"num_batch", "1"},
|
||||||
|
"num_gqa 1": {"num_gqa", "1"},
|
||||||
|
"num_gpu 1": {"num_gpu", "1"},
|
||||||
|
"main_gpu 1": {"main_gpu", "1"},
|
||||||
|
"low_vram true": {"low_vram", "true"},
|
||||||
|
"f16_kv true": {"f16_kv", "true"},
|
||||||
|
"logits_all true": {"logits_all", "true"},
|
||||||
|
"vocab_only true": {"vocab_only", "true"},
|
||||||
|
"use_mmap true": {"use_mmap", "true"},
|
||||||
|
"use_mlock true": {"use_mlock", "true"},
|
||||||
|
"num_thread 1": {"num_thread", "1"},
|
||||||
|
"num_keep 1": {"num_keep", "1"},
|
||||||
|
"seed 1": {"seed", "1"},
|
||||||
|
"num_predict 1": {"num_predict", "1"},
|
||||||
|
"top_k 1": {"top_k", "1"},
|
||||||
|
"top_p 1.0": {"top_p", "1.0"},
|
||||||
|
"tfs_z 1.0": {"tfs_z", "1.0"},
|
||||||
|
"typical_p 1.0": {"typical_p", "1.0"},
|
||||||
|
"repeat_last_n 1": {"repeat_last_n", "1"},
|
||||||
|
"temperature 1.0": {"temperature", "1.0"},
|
||||||
|
"repeat_penalty 1.0": {"repeat_penalty", "1.0"},
|
||||||
|
"presence_penalty 1.0": {"presence_penalty", "1.0"},
|
||||||
|
"frequency_penalty 1.0": {"frequency_penalty", "1.0"},
|
||||||
|
"mirostat 1": {"mirostat", "1"},
|
||||||
|
"mirostat_tau 1.0": {"mirostat_tau", "1.0"},
|
||||||
|
"mirostat_eta 1.0": {"mirostat_eta", "1.0"},
|
||||||
|
"penalize_newline true": {"penalize_newline", "true"},
|
||||||
|
"stop ### User:": {"stop", "### User:"},
|
||||||
|
"stop ### User: ": {"stop", "### User: "},
|
||||||
|
"stop \"### User:\"": {"stop", "### User:"},
|
||||||
|
"stop \"### User: \"": {"stop", "### User: "},
|
||||||
|
"stop \"\"\"### User:\"\"\"": {"stop", "### User:"},
|
||||||
|
"stop \"\"\"### User:\n\"\"\"": {"stop", "### User:\n"},
|
||||||
|
"stop <|endoftext|>": {"stop", "<|endoftext|>"},
|
||||||
|
"stop <|eot_id|>": {"stop", "<|eot_id|>"},
|
||||||
|
"stop </s>": {"stop", "</s>"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range cases {
|
||||||
|
t.Run(k, func(t *testing.T) {
|
||||||
|
var b bytes.Buffer
|
||||||
|
fmt.Fprintln(&b, "FROM foo")
|
||||||
|
fmt.Fprintln(&b, "PARAMETER", k)
|
||||||
|
commands, err := Parse(&b)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, []Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
{Name: v.name, Args: v.value},
|
||||||
|
}, commands)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParserComments(t *testing.T) {
|
||||||
|
var cases = []struct {
|
||||||
|
input string
|
||||||
|
expected []Command
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
`
|
||||||
|
# comment
|
||||||
|
FROM foo
|
||||||
|
`,
|
||||||
|
[]Command{
|
||||||
|
{Name: "model", Args: "foo"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run("", func(t *testing.T) {
|
||||||
|
commands, err := Parse(strings.NewReader(c.input))
|
||||||
|
assert.Nil(t, err)
|
||||||
|
assert.Equal(t, c.expected, commands)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseFormatParse(t *testing.T) {
|
||||||
|
var cases = []string{
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
ADAPTER adapter1
|
||||||
|
LICENSE MIT
|
||||||
|
PARAMETER param1 value1
|
||||||
|
PARAMETER param2 value2
|
||||||
|
TEMPLATE template1
|
||||||
|
MESSAGE system You are a Parser. Always Parse things.
|
||||||
|
MESSAGE user Hey there!
|
||||||
|
MESSAGE assistant Hello, I want to parse all the things!
|
||||||
|
`,
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
ADAPTER adapter1
|
||||||
|
LICENSE MIT
|
||||||
|
PARAMETER param1 value1
|
||||||
|
PARAMETER param2 value2
|
||||||
|
TEMPLATE template1
|
||||||
|
MESSAGE system """
|
||||||
|
You are a store greeter. Always responsed with "Hello!".
|
||||||
|
"""
|
||||||
|
MESSAGE user Hey there!
|
||||||
|
MESSAGE assistant Hello, I want to parse all the things!
|
||||||
|
`,
|
||||||
|
`
|
||||||
|
FROM foo
|
||||||
|
ADAPTER adapter1
|
||||||
|
LICENSE """
|
||||||
|
Very long and boring legal text.
|
||||||
|
Blah blah blah.
|
||||||
|
"Oh look, a quote!"
|
||||||
|
"""
|
||||||
|
|
||||||
|
PARAMETER param1 value1
|
||||||
|
PARAMETER param2 value2
|
||||||
|
TEMPLATE template1
|
||||||
|
MESSAGE system """
|
||||||
|
You are a store greeter. Always responsed with "Hello!".
|
||||||
|
"""
|
||||||
|
MESSAGE user Hey there!
|
||||||
|
MESSAGE assistant Hello, I want to parse all the things!
|
||||||
|
`,
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run("", func(t *testing.T) {
|
||||||
|
commands, err := Parse(strings.NewReader(c))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
commands2, err := Parse(strings.NewReader(Format(commands)))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, commands, commands2)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
reader := strings.NewReader(input)
|
|
||||||
_, err := Parse(reader)
|
|
||||||
assert.ErrorContains(t, err, "role must be one of \"system\", \"user\", or \"assistant\"")
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,6 +7,8 @@
|
|||||||
$ErrorActionPreference = "Stop"
|
$ErrorActionPreference = "Stop"
|
||||||
|
|
||||||
function checkEnv() {
|
function checkEnv() {
|
||||||
|
$script:TARGET_ARCH=$Env:PROCESSOR_ARCHITECTURE.ToLower()
|
||||||
|
Write-host "Building for ${script:TARGET_ARCH}"
|
||||||
write-host "Locating required tools and paths"
|
write-host "Locating required tools and paths"
|
||||||
$script:SRC_DIR=$PWD
|
$script:SRC_DIR=$PWD
|
||||||
if (!$env:VCToolsRedistDir) {
|
if (!$env:VCToolsRedistDir) {
|
||||||
@@ -30,7 +32,7 @@ function checkEnv() {
|
|||||||
|
|
||||||
$script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0]
|
$script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0]
|
||||||
|
|
||||||
$script:DEPS_DIR="${script:SRC_DIR}\dist\windows-amd64"
|
$script:DEPS_DIR="${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}"
|
||||||
$env:CGO_ENABLED="1"
|
$env:CGO_ENABLED="1"
|
||||||
echo "Checking version"
|
echo "Checking version"
|
||||||
if (!$env:VERSION) {
|
if (!$env:VERSION) {
|
||||||
@@ -81,8 +83,8 @@ function buildOllama() {
|
|||||||
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe
|
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||||
}
|
}
|
||||||
New-Item -ItemType Directory -Path .\dist\windows-amd64\ -Force
|
New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\ -Force
|
||||||
cp .\ollama.exe .\dist\windows-amd64\ollama-windows-amd64.exe
|
cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildApp() {
|
function buildApp() {
|
||||||
@@ -109,9 +111,6 @@ function gatherDependencies() {
|
|||||||
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\"
|
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\"
|
||||||
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\"
|
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\"
|
||||||
|
|
||||||
cp "${script:NVIDIA_DIR}\cudart64_*.dll" "${script:DEPS_DIR}\"
|
|
||||||
cp "${script:NVIDIA_DIR}\cublas64_*.dll" "${script:DEPS_DIR}\"
|
|
||||||
cp "${script:NVIDIA_DIR}\cublasLt64_*.dll" "${script:DEPS_DIR}\"
|
|
||||||
|
|
||||||
cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\"
|
cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\"
|
||||||
if ("${env:KEY_CONTAINER}") {
|
if ("${env:KEY_CONTAINER}") {
|
||||||
@@ -123,15 +122,6 @@ function gatherDependencies() {
|
|||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ($null -ne $env:HIP_PATH) {
|
|
||||||
# Assumes v5.7, may need adjustments for v6
|
|
||||||
rm -ea 0 -recurse -force -path "${script:DEPS_DIR}\rocm\"
|
|
||||||
md "${script:DEPS_DIR}\rocm\rocblas\library\" -ea 0 > $null
|
|
||||||
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:DEPS_DIR}\rocm\"
|
|
||||||
cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:DEPS_DIR}\rocm\"
|
|
||||||
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
|
|
||||||
cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:DEPS_DIR}\rocm\rocblas\library\"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildInstaller() {
|
function buildInstaller() {
|
||||||
@@ -139,16 +129,16 @@ function buildInstaller() {
|
|||||||
cd "${script:SRC_DIR}\app"
|
cd "${script:SRC_DIR}\app"
|
||||||
$env:PKG_VERSION=$script:PKG_VERSION
|
$env:PKG_VERSION=$script:PKG_VERSION
|
||||||
if ("${env:KEY_CONTAINER}") {
|
if ("${env:KEY_CONTAINER}") {
|
||||||
& "${script:INNO_SETUP_DIR}\ISCC.exe" /SMySignTool="${script:SignTool} sign /fd sha256 /t http://timestamp.digicert.com /f ${script:OLLAMA_CERT} /csp `$qGoogle Cloud KMS Provider`$q /kc ${env:KEY_CONTAINER} `$f" .\ollama.iss
|
& "${script:INNO_SETUP_DIR}\ISCC.exe" /DARCH=$script:TARGET_ARCH /SMySignTool="${script:SignTool} sign /fd sha256 /t http://timestamp.digicert.com /f ${script:OLLAMA_CERT} /csp `$qGoogle Cloud KMS Provider`$q /kc ${env:KEY_CONTAINER} `$f" .\ollama.iss
|
||||||
} else {
|
} else {
|
||||||
& "${script:INNO_SETUP_DIR}\ISCC.exe" .\ollama.iss
|
& "${script:INNO_SETUP_DIR}\ISCC.exe" /DARCH=$script:TARGET_ARCH .\ollama.iss
|
||||||
}
|
}
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||||
}
|
}
|
||||||
|
|
||||||
function distZip() {
|
function distZip() {
|
||||||
write-host "Generating stand-alone distribution zip file ${script:SRC_DIR}\dist\ollama-windows-amd64.zip"
|
write-host "Generating stand-alone distribution zip file ${script:SRC_DIR}\dist\ollama-windows-${script:TARGET_ARCH}.zip"
|
||||||
Compress-Archive -Path "${script:SRC_DIR}\dist\windows-amd64\*" -DestinationPath "${script:SRC_DIR}\dist\ollama-windows-amd64.zip" -Force
|
Compress-Archive -Path "${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}\*" -DestinationPath "${script:SRC_DIR}\dist\ollama-windows-${script:TARGET_ARCH}.zip" -Force
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|||||||
850
server/images.go
850
server/images.go
File diff suppressed because it is too large
Load Diff
@@ -5,39 +5,18 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"golang.org/x/exp/slices"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Layers struct {
|
|
||||||
items []*Layer
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ls *Layers) Add(layer *Layer) {
|
|
||||||
if layer.Size > 0 {
|
|
||||||
ls.items = append(ls.items, layer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ls *Layers) Replace(layer *Layer) {
|
|
||||||
if layer.Size > 0 {
|
|
||||||
mediatype := layer.MediaType
|
|
||||||
layers := slices.DeleteFunc(ls.items, func(l *Layer) bool {
|
|
||||||
return l.MediaType == mediatype
|
|
||||||
})
|
|
||||||
|
|
||||||
ls.items = append(layers, layer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type Layer struct {
|
type Layer struct {
|
||||||
MediaType string `json:"mediaType"`
|
MediaType string `json:"mediaType"`
|
||||||
Digest string `json:"digest"`
|
Digest string `json:"digest"`
|
||||||
Size int64 `json:"size"`
|
Size int64 `json:"size"`
|
||||||
From string `json:"from,omitempty"`
|
From string `json:"from,omitempty"`
|
||||||
|
|
||||||
tempFileName string
|
Intermediate bool `json:"intermediate,omitempty"`
|
||||||
|
MergeBase string `json:"merge_base,omitempty"`
|
||||||
|
|
||||||
|
message string
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
||||||
@@ -46,14 +25,12 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
const delimiter = "-"
|
temp, err := os.CreateTemp(blobs, "sha256-")
|
||||||
|
|
||||||
pattern := strings.Join([]string{"sha256", "*-partial"}, delimiter)
|
|
||||||
temp, err := os.CreateTemp(blobs, pattern)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer temp.Close()
|
defer temp.Close()
|
||||||
|
defer os.Remove(temp.Name())
|
||||||
|
|
||||||
sha256sum := sha256.New()
|
sha256sum := sha256.New()
|
||||||
n, err := io.Copy(io.MultiWriter(temp, sha256sum), r)
|
n, err := io.Copy(io.MultiWriter(temp, sha256sum), r)
|
||||||
@@ -61,11 +38,29 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := temp.Close(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
digest := fmt.Sprintf("sha256:%x", sha256sum.Sum(nil))
|
||||||
|
blob, err := GetBlobsPath(digest)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
status := "using existing layer"
|
||||||
|
if _, err := os.Stat(blob); err != nil {
|
||||||
|
status = "creating new layer"
|
||||||
|
if err := os.Rename(temp.Name(), blob); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return &Layer{
|
return &Layer{
|
||||||
MediaType: mediatype,
|
MediaType: mediatype,
|
||||||
Digest: fmt.Sprintf("sha256:%x", sha256sum.Sum(nil)),
|
Digest: digest,
|
||||||
Size: n,
|
Size: n,
|
||||||
tempFileName: temp.Name(),
|
message: fmt.Sprintf("%s %s", status, digest),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -85,21 +80,15 @@ func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) {
|
|||||||
Digest: digest,
|
Digest: digest,
|
||||||
Size: fi.Size(),
|
Size: fi.Size(),
|
||||||
From: from,
|
From: from,
|
||||||
|
message: fmt.Sprintf("using existing layer %s", digest),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *Layer) Commit() (bool, error) {
|
func (l *Layer) Open() (*os.File, error) {
|
||||||
// always remove temp
|
|
||||||
defer os.Remove(l.tempFileName)
|
|
||||||
|
|
||||||
blob, err := GetBlobsPath(l.Digest)
|
blob, err := GetBlobsPath(l.Digest)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err := os.Stat(blob); err != nil {
|
return os.Open(blob)
|
||||||
return true, os.Rename(l.tempFileName, blob)
|
|
||||||
}
|
|
||||||
|
|
||||||
return false, nil
|
|
||||||
}
|
}
|
||||||
259
server/model.go
Normal file
259
server/model.go
Normal file
@@ -0,0 +1,259 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"archive/zip"
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/convert"
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
"github.com/ollama/ollama/types/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
type layerWithGGML struct {
|
||||||
|
*Layer
|
||||||
|
*llm.GGML
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
|
||||||
|
modelpath := ParseModelPath(name.String())
|
||||||
|
manifest, _, err := GetManifest(modelpath)
|
||||||
|
switch {
|
||||||
|
case errors.Is(err, os.ErrNotExist):
|
||||||
|
if err := PullModel(ctx, name.String(), ®istryOptions{}, fn); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
modelpath = ParseModelPath(name.String())
|
||||||
|
manifest, _, err = GetManifest(modelpath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
case err != nil:
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, layer := range manifest.Layers {
|
||||||
|
layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, modelpath.GetShortTagname())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch layer.MediaType {
|
||||||
|
case "application/vnd.ollama.image.model",
|
||||||
|
"application/vnd.ollama.image.projector",
|
||||||
|
"application/vnd.ollama.image.adapter":
|
||||||
|
blobpath, err := GetBlobsPath(layer.Digest)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
blob, err := os.Open(blobpath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer blob.Close()
|
||||||
|
|
||||||
|
ggml, _, err := llm.DecodeGGML(blob)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
layers = append(layers, &layerWithGGML{layer, ggml})
|
||||||
|
default:
|
||||||
|
layers = append(layers, &layerWithGGML{layer, nil})
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return layers, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseFromZipFile(_ context.Context, file *os.File, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
|
||||||
|
stat, err := file.Stat()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
r, err := zip.NewReader(file, stat.Size())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tempdir, err := os.MkdirTemp(filepath.Dir(file.Name()), "")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(tempdir)
|
||||||
|
|
||||||
|
fn(api.ProgressResponse{Status: "unpacking model metadata"})
|
||||||
|
for _, f := range r.File {
|
||||||
|
// TODO(mxyng): this should not write out all files to disk
|
||||||
|
outfile, err := os.Create(filepath.Join(tempdir, f.Name))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
infile, err := f.Open()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err = io.Copy(outfile, infile); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := outfile.Close(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := infile.Close(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mf, err := convert.GetModelFormat(tempdir)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
params, err := mf.GetParams(tempdir)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
mArch, err := mf.GetModelArch("", tempdir, params)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
fn(api.ProgressResponse{Status: "processing tensors"})
|
||||||
|
if err := mArch.GetTensors(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := mArch.LoadVocab(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
fn(api.ProgressResponse{Status: "converting model"})
|
||||||
|
|
||||||
|
// TODO(mxyng): this should write directly into a layer
|
||||||
|
// e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
|
||||||
|
temp, err := os.CreateTemp(tempdir, "fp16")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer temp.Close()
|
||||||
|
defer os.Remove(temp.Name())
|
||||||
|
|
||||||
|
if err = mArch.WriteGGUF(temp); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := temp.Seek(0, io.SeekStart); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
layer, err := NewLayer(temp, "application/vnd.ollama.image.model")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("aaa: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
blobpath, err := GetBlobsPath(layer.Digest)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
bin, err := os.Open(blobpath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer bin.Close()
|
||||||
|
|
||||||
|
ggml, _, err := llm.DecodeGGML(bin)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
layer, err = NewLayerFromLayer(layer.Digest, layer.MediaType, "")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
layers = append(layers, &layerWithGGML{layer, ggml})
|
||||||
|
return layers, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseFromFile(ctx context.Context, file *os.File, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
|
||||||
|
sr := io.NewSectionReader(file, 0, 512)
|
||||||
|
contentType, err := detectContentType(sr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch contentType {
|
||||||
|
case "gguf", "ggla":
|
||||||
|
// noop
|
||||||
|
case "application/zip":
|
||||||
|
return parseFromZipFile(ctx, file, fn)
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unsupported content type: %s", contentType)
|
||||||
|
}
|
||||||
|
|
||||||
|
stat, err := file.Stat()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var offset int64
|
||||||
|
for offset < stat.Size() {
|
||||||
|
ggml, n, err := llm.DecodeGGML(file)
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
break
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
mediatype := "application/vnd.ollama.image.model"
|
||||||
|
if ggml.Name() == "ggla" {
|
||||||
|
mediatype = "application/vnd.ollama.image.adapter"
|
||||||
|
} else if ggml.KV().Architecture() == "clip" {
|
||||||
|
mediatype = "application/vnd.ollama.image.projector"
|
||||||
|
}
|
||||||
|
|
||||||
|
layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
layers = append(layers, &layerWithGGML{layer, ggml})
|
||||||
|
offset = n
|
||||||
|
}
|
||||||
|
|
||||||
|
return layers, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func detectContentType(r io.Reader) (string, error) {
|
||||||
|
var b bytes.Buffer
|
||||||
|
if _, err := io.Copy(&b, r); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
if contentType := llm.DetectGGMLType(b.Bytes()); contentType != "" {
|
||||||
|
return contentType, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if contentType := http.DetectContentType(b.Bytes()); contentType != "application/octet-stream" {
|
||||||
|
return contentType, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return "unknown", nil
|
||||||
|
}
|
||||||
@@ -146,6 +146,11 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
select {
|
select {
|
||||||
case runner = <-rCh:
|
case runner = <-rCh:
|
||||||
case err = <-eCh:
|
case err = <-eCh:
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
c.JSON(499, gin.H{"error": "request canceled"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -389,6 +394,11 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
|
|||||||
select {
|
select {
|
||||||
case runner = <-rCh:
|
case runner = <-rCh:
|
||||||
case err = <-eCh:
|
case err = <-eCh:
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
c.JSON(499, gin.H{"error": "request canceled"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -570,7 +580,7 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
|
|||||||
ctx, cancel := context.WithCancel(c.Request.Context())
|
ctx, cancel := context.WithCancel(c.Request.Context())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
if err := CreateModel(ctx, model, filepath.Dir(req.Path), req.Quantization, commands, fn); err != nil {
|
if err := CreateModel(ctx, model, filepath.Dir(req.Path), strings.ToUpper(req.Quantization), commands, fn); err != nil {
|
||||||
ch <- gin.H{"error": err.Error()}
|
ch <- gin.H{"error": err.Error()}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@@ -718,12 +728,12 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mf, err := ShowModelfile(model)
|
var sb strings.Builder
|
||||||
if err != nil {
|
fmt.Fprintln(&sb, "# Modelfile generate by \"ollama show\"")
|
||||||
return nil, err
|
fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
|
||||||
}
|
fmt.Fprintf(&sb, "# FROM %s\n\n", model.ShortName)
|
||||||
|
fmt.Fprint(&sb, parser.Format(model.Commands()))
|
||||||
resp.Modelfile = mf
|
resp.Modelfile = sb.String()
|
||||||
|
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
@@ -800,16 +810,13 @@ func (s *Server) CopyModelHandler(c *gin.Context) {
|
|||||||
|
|
||||||
src := model.ParseName(r.Source)
|
src := model.ParseName(r.Source)
|
||||||
if !src.IsValid() {
|
if !src.IsValid() {
|
||||||
_ = c.Error(fmt.Errorf("source %q is invalid", r.Source))
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("source %q is invalid", r.Source)})
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
dst := model.ParseName(r.Destination)
|
dst := model.ParseName(r.Destination)
|
||||||
if !dst.IsValid() {
|
if !dst.IsValid() {
|
||||||
_ = c.Error(fmt.Errorf("destination %q is invalid", r.Destination))
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("destination %q is invalid", r.Source)})
|
||||||
}
|
|
||||||
|
|
||||||
if len(c.Errors) > 0 {
|
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": c.Errors.Errors()})
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -865,11 +872,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err := layer.Commit(); err != nil {
|
|
||||||
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
c.Status(http.StatusCreated)
|
c.Status(http.StatusCreated)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1216,6 +1218,11 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
select {
|
select {
|
||||||
case runner = <-rCh:
|
case runner = <-rCh:
|
||||||
case err = <-eCh:
|
case err = <-eCh:
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
c.JSON(499, gin.H{"error": "request canceled"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -124,14 +124,12 @@ func Test_Routes(t *testing.T) {
|
|||||||
Method: http.MethodPost,
|
Method: http.MethodPost,
|
||||||
Path: "/api/create",
|
Path: "/api/create",
|
||||||
Setup: func(t *testing.T, req *http.Request) {
|
Setup: func(t *testing.T, req *http.Request) {
|
||||||
f, err := os.CreateTemp(t.TempDir(), "ollama-model")
|
fname := createTestFile(t, "ollama-model")
|
||||||
assert.Nil(t, err)
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
stream := false
|
stream := false
|
||||||
createReq := api.CreateRequest{
|
createReq := api.CreateRequest{
|
||||||
Name: "t-bone",
|
Name: "t-bone",
|
||||||
Modelfile: fmt.Sprintf("FROM %s", f.Name()),
|
Modelfile: fmt.Sprintf("FROM %s", fname),
|
||||||
Stream: &stream,
|
Stream: &stream,
|
||||||
}
|
}
|
||||||
jsonData, err := json.Marshal(createReq)
|
jsonData, err := json.Marshal(createReq)
|
||||||
@@ -216,28 +214,25 @@ func Test_Routes(t *testing.T) {
|
|||||||
httpSrv := httptest.NewServer(router)
|
httpSrv := httptest.NewServer(router)
|
||||||
t.Cleanup(httpSrv.Close)
|
t.Cleanup(httpSrv.Close)
|
||||||
|
|
||||||
workDir, err := os.MkdirTemp("", "ollama-test")
|
t.Setenv("OLLAMA_MODELS", t.TempDir())
|
||||||
assert.Nil(t, err)
|
|
||||||
defer os.RemoveAll(workDir)
|
|
||||||
os.Setenv("OLLAMA_MODELS", workDir)
|
|
||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Logf("Running Test: [%s]", tc.Name)
|
t.Run(tc.Name, func(t *testing.T) {
|
||||||
u := httpSrv.URL + tc.Path
|
u := httpSrv.URL + tc.Path
|
||||||
req, err := http.NewRequestWithContext(context.TODO(), tc.Method, u, nil)
|
req, err := http.NewRequestWithContext(context.TODO(), tc.Method, u, nil)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
if tc.Setup != nil {
|
if tc.Setup != nil {
|
||||||
tc.Setup(t, req)
|
tc.Setup(t, req)
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := httpSrv.Client().Do(req)
|
resp, err := httpSrv.Client().Do(req)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if tc.Expected != nil {
|
|
||||||
tc.Expected(t, resp)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
if tc.Expected != nil {
|
||||||
|
tc.Expected(t, resp)
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -149,6 +149,14 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we're CPU only mode, just limit by loadedMax above
|
||||||
|
// TODO handle system memory exhaustion
|
||||||
|
if (len(gpus) == 1 && gpus[0].Library == "cpu") || pending.opts.NumGPU == 0 {
|
||||||
|
slog.Debug("cpu mode with existing models, loading")
|
||||||
|
s.loadFn(pending, ggml, gpus)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
// No models loaded. Load the model but prefer the best fit.
|
// No models loaded. Load the model but prefer the best fit.
|
||||||
if loadedCount == 0 {
|
if loadedCount == 0 {
|
||||||
slog.Debug("loading first model", "model", pending.model.ModelPath)
|
slog.Debug("loading first model", "model", pending.model.ModelPath)
|
||||||
@@ -242,6 +250,7 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
|
|||||||
defer runner.refMu.Unlock()
|
defer runner.refMu.Unlock()
|
||||||
if runner.expireTimer != nil {
|
if runner.expireTimer != nil {
|
||||||
runner.expireTimer.Stop()
|
runner.expireTimer.Stop()
|
||||||
|
runner.expireTimer = nil
|
||||||
}
|
}
|
||||||
s.expiredCh <- runner
|
s.expiredCh <- runner
|
||||||
})
|
})
|
||||||
@@ -288,6 +297,10 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
|
|||||||
runner.refMu.Lock()
|
runner.refMu.Lock()
|
||||||
defer runner.refMu.Unlock()
|
defer runner.refMu.Unlock()
|
||||||
runner.refCount++
|
runner.refCount++
|
||||||
|
if runner.expireTimer != nil {
|
||||||
|
runner.expireTimer.Stop()
|
||||||
|
runner.expireTimer = nil
|
||||||
|
}
|
||||||
runner.sessionDuration = pending.sessionDuration
|
runner.sessionDuration = pending.sessionDuration
|
||||||
pending.successCh <- runner
|
pending.successCh <- runner
|
||||||
go func() {
|
go func() {
|
||||||
@@ -418,6 +431,10 @@ type runnerRef struct {
|
|||||||
|
|
||||||
// The refMu must already be held when calling unload
|
// The refMu must already be held when calling unload
|
||||||
func (runner *runnerRef) unload() {
|
func (runner *runnerRef) unload() {
|
||||||
|
if runner.expireTimer != nil {
|
||||||
|
runner.expireTimer.Stop()
|
||||||
|
runner.expireTimer = nil
|
||||||
|
}
|
||||||
if runner.llama != nil {
|
if runner.llama != nil {
|
||||||
runner.llama.Close()
|
runner.llama.Close()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,19 +28,33 @@ func TestInitScheduler(t *testing.T) {
|
|||||||
ctx, done := context.WithCancel(context.Background())
|
ctx, done := context.WithCancel(context.Background())
|
||||||
defer done()
|
defer done()
|
||||||
initialMax := loadedMax
|
initialMax := loadedMax
|
||||||
|
initialParallel := numParallel
|
||||||
s := InitScheduler(ctx)
|
s := InitScheduler(ctx)
|
||||||
require.Equal(t, initialMax, loadedMax)
|
require.Equal(t, initialMax, loadedMax)
|
||||||
|
s.loadedMu.Lock()
|
||||||
require.NotNil(t, s.loaded)
|
require.NotNil(t, s.loaded)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
os.Setenv("OLLAMA_MAX_LOADED_MODELS", "blue")
|
os.Setenv("OLLAMA_MAX_LOADED_MODELS", "blue")
|
||||||
s = InitScheduler(ctx)
|
s = InitScheduler(ctx)
|
||||||
require.Equal(t, initialMax, loadedMax)
|
require.Equal(t, initialMax, loadedMax)
|
||||||
|
s.loadedMu.Lock()
|
||||||
require.NotNil(t, s.loaded)
|
require.NotNil(t, s.loaded)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
os.Setenv("OLLAMA_MAX_LOADED_MODELS", "0")
|
os.Setenv("OLLAMA_MAX_LOADED_MODELS", "0")
|
||||||
s = InitScheduler(ctx)
|
s = InitScheduler(ctx)
|
||||||
require.Equal(t, 0, loadedMax)
|
require.Equal(t, 0, loadedMax)
|
||||||
|
s.loadedMu.Lock()
|
||||||
require.NotNil(t, s.loaded)
|
require.NotNil(t, s.loaded)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
|
os.Setenv("OLLAMA_NUM_PARALLEL", "blue")
|
||||||
|
_ = InitScheduler(ctx)
|
||||||
|
require.Equal(t, initialParallel, numParallel)
|
||||||
|
os.Setenv("OLLAMA_NUM_PARALLEL", "10")
|
||||||
|
_ = InitScheduler(ctx)
|
||||||
|
require.Equal(t, 10, numParallel)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLoad(t *testing.T) {
|
func TestLoad(t *testing.T) {
|
||||||
@@ -51,6 +65,7 @@ func TestLoad(t *testing.T) {
|
|||||||
req := &LlmRequest{
|
req := &LlmRequest{
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
model: &Model{ModelPath: "foo"},
|
model: &Model{ModelPath: "foo"},
|
||||||
|
opts: api.DefaultOptions(),
|
||||||
successCh: make(chan *runnerRef, 1),
|
successCh: make(chan *runnerRef, 1),
|
||||||
errCh: make(chan error, 1),
|
errCh: make(chan error, 1),
|
||||||
sessionDuration: 2,
|
sessionDuration: 2,
|
||||||
@@ -63,7 +78,9 @@ func TestLoad(t *testing.T) {
|
|||||||
s.load(req, ggml, gpus)
|
s.load(req, ggml, gpus)
|
||||||
require.Len(t, req.successCh, 0)
|
require.Len(t, req.successCh, 0)
|
||||||
require.Len(t, req.errCh, 1)
|
require.Len(t, req.errCh, 1)
|
||||||
|
s.loadedMu.Lock()
|
||||||
require.Len(t, s.loaded, 0)
|
require.Len(t, s.loaded, 0)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
err := <-req.errCh
|
err := <-req.errCh
|
||||||
require.Contains(t, err.Error(), "this model may be incompatible")
|
require.Contains(t, err.Error(), "this model may be incompatible")
|
||||||
|
|
||||||
@@ -78,7 +95,9 @@ func TestLoad(t *testing.T) {
|
|||||||
case resp := <-req.successCh:
|
case resp := <-req.successCh:
|
||||||
require.Equal(t, uint64(10), resp.estimatedVRAM)
|
require.Equal(t, uint64(10), resp.estimatedVRAM)
|
||||||
require.Equal(t, uint(1), resp.refCount)
|
require.Equal(t, uint(1), resp.refCount)
|
||||||
|
s.loadedMu.Lock()
|
||||||
require.Len(t, s.loaded, 1)
|
require.Len(t, s.loaded, 1)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
req.model.ModelPath = "dummy_model_path"
|
req.model.ModelPath = "dummy_model_path"
|
||||||
@@ -90,7 +109,9 @@ func TestLoad(t *testing.T) {
|
|||||||
case resp := <-req.successCh:
|
case resp := <-req.successCh:
|
||||||
t.Errorf("unexpected success %v", resp)
|
t.Errorf("unexpected success %v", resp)
|
||||||
}
|
}
|
||||||
|
s.loadedMu.Lock()
|
||||||
runner := s.loaded["dummy_model_path"]
|
runner := s.loaded["dummy_model_path"]
|
||||||
|
s.loadedMu.Unlock()
|
||||||
require.NotNil(t, runner)
|
require.NotNil(t, runner)
|
||||||
require.Equal(t, uint(0), runner.refCount)
|
require.Equal(t, uint(0), runner.refCount)
|
||||||
time.Sleep(1 * time.Millisecond)
|
time.Sleep(1 * time.Millisecond)
|
||||||
@@ -143,6 +164,7 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
|
|||||||
scenario.req = &LlmRequest{
|
scenario.req = &LlmRequest{
|
||||||
ctx: scenario.ctx,
|
ctx: scenario.ctx,
|
||||||
model: model,
|
model: model,
|
||||||
|
opts: api.DefaultOptions(),
|
||||||
sessionDuration: 5 * time.Millisecond,
|
sessionDuration: 5 * time.Millisecond,
|
||||||
successCh: make(chan *runnerRef, 1),
|
successCh: make(chan *runnerRef, 1),
|
||||||
errCh: make(chan error, 1),
|
errCh: make(chan error, 1),
|
||||||
@@ -171,7 +193,9 @@ func TestRequests(t *testing.T) {
|
|||||||
// Multiple loaded models
|
// Multiple loaded models
|
||||||
scenario3a := newScenario(t, ctx, "ollama-model-3a", 1*format.GigaByte)
|
scenario3a := newScenario(t, ctx, "ollama-model-3a", 1*format.GigaByte)
|
||||||
scenario3b := newScenario(t, ctx, "ollama-model-3b", 24*format.GigaByte)
|
scenario3b := newScenario(t, ctx, "ollama-model-3b", 24*format.GigaByte)
|
||||||
scenario3c := newScenario(t, ctx, "ollama-model-3c", 30) // Needs prior unloaded
|
scenario3c := newScenario(t, ctx, "ollama-model-4a", 30)
|
||||||
|
scenario3c.req.opts.NumGPU = 0 // CPU load, will be allowed
|
||||||
|
scenario3d := newScenario(t, ctx, "ollama-model-3c", 30) // Needs prior unloaded
|
||||||
|
|
||||||
s := InitScheduler(ctx)
|
s := InitScheduler(ctx)
|
||||||
s.getGpuFn = func() gpu.GpuInfoList {
|
s.getGpuFn = func() gpu.GpuInfoList {
|
||||||
@@ -240,7 +264,9 @@ func TestRequests(t *testing.T) {
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
t.Errorf("timeout")
|
t.Errorf("timeout")
|
||||||
}
|
}
|
||||||
|
s.loadedMu.Lock()
|
||||||
require.Len(t, s.loaded, 1)
|
require.Len(t, s.loaded, 1)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
loadedMax = 0
|
loadedMax = 0
|
||||||
s.newServerFn = scenario3b.newServer
|
s.newServerFn = scenario3b.newServer
|
||||||
@@ -254,19 +280,14 @@ func TestRequests(t *testing.T) {
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
t.Errorf("timeout")
|
t.Errorf("timeout")
|
||||||
}
|
}
|
||||||
|
s.loadedMu.Lock()
|
||||||
require.Len(t, s.loaded, 2)
|
require.Len(t, s.loaded, 2)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
// Try to load a model that wont fit
|
// This is a CPU load with NumGPU = 0 so it should load
|
||||||
s.newServerFn = scenario3c.newServer
|
s.newServerFn = scenario3c.newServer
|
||||||
slog.Info("scenario3c")
|
slog.Info("scenario3c")
|
||||||
require.Len(t, s.loaded, 2)
|
|
||||||
scenario3a.ctxDone() // Won't help since this one isn't big enough to make room
|
|
||||||
time.Sleep(2 * time.Millisecond)
|
|
||||||
s.pendingReqCh <- scenario3c.req
|
s.pendingReqCh <- scenario3c.req
|
||||||
// finish prior request, so new model can load
|
|
||||||
time.Sleep(6 * time.Millisecond)
|
|
||||||
require.Len(t, s.loaded, 1)
|
|
||||||
scenario3b.ctxDone()
|
|
||||||
select {
|
select {
|
||||||
case resp := <-scenario3c.req.successCh:
|
case resp := <-scenario3c.req.successCh:
|
||||||
require.Equal(t, resp.llama, scenario3c.srv)
|
require.Equal(t, resp.llama, scenario3c.srv)
|
||||||
@@ -275,7 +296,36 @@ func TestRequests(t *testing.T) {
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
t.Errorf("timeout")
|
t.Errorf("timeout")
|
||||||
}
|
}
|
||||||
require.Len(t, s.loaded, 1)
|
s.loadedMu.Lock()
|
||||||
|
require.Len(t, s.loaded, 3)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
|
// Try to load a model that wont fit
|
||||||
|
s.newServerFn = scenario3d.newServer
|
||||||
|
slog.Info("scenario3d")
|
||||||
|
s.loadedMu.Lock()
|
||||||
|
require.Len(t, s.loaded, 3)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
scenario3a.ctxDone() // Won't help since this one isn't big enough to make room
|
||||||
|
time.Sleep(2 * time.Millisecond)
|
||||||
|
s.pendingReqCh <- scenario3d.req
|
||||||
|
// finish prior request, so new model can load
|
||||||
|
time.Sleep(6 * time.Millisecond)
|
||||||
|
s.loadedMu.Lock()
|
||||||
|
require.Len(t, s.loaded, 2)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
scenario3b.ctxDone()
|
||||||
|
select {
|
||||||
|
case resp := <-scenario3d.req.successCh:
|
||||||
|
require.Equal(t, resp.llama, scenario3d.srv)
|
||||||
|
require.Len(t, s.pendingReqCh, 0)
|
||||||
|
require.Len(t, scenario3d.req.errCh, 0)
|
||||||
|
case <-ctx.Done():
|
||||||
|
t.Errorf("timeout")
|
||||||
|
}
|
||||||
|
s.loadedMu.Lock()
|
||||||
|
require.Len(t, s.loaded, 2)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetRunner(t *testing.T) {
|
func TestGetRunner(t *testing.T) {
|
||||||
@@ -318,7 +368,9 @@ func TestGetRunner(t *testing.T) {
|
|||||||
t.Errorf("timeout")
|
t.Errorf("timeout")
|
||||||
}
|
}
|
||||||
scenario1a.ctxDone()
|
scenario1a.ctxDone()
|
||||||
|
s.loadedMu.Lock()
|
||||||
require.Len(t, s.loaded, 1)
|
require.Len(t, s.loaded, 1)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
scenario1c.req.model.ModelPath = "bad path"
|
scenario1c.req.model.ModelPath = "bad path"
|
||||||
slog.Info("scenario1c")
|
slog.Info("scenario1c")
|
||||||
@@ -328,7 +380,9 @@ func TestGetRunner(t *testing.T) {
|
|||||||
require.Len(t, errCh1c, 0)
|
require.Len(t, errCh1c, 0)
|
||||||
|
|
||||||
time.Sleep(5 * time.Millisecond)
|
time.Sleep(5 * time.Millisecond)
|
||||||
|
s.loadedMu.Lock()
|
||||||
require.Len(t, s.loaded, 0)
|
require.Len(t, s.loaded, 0)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
require.Len(t, errCh1c, 1)
|
require.Len(t, errCh1c, 1)
|
||||||
err = <-errCh1c
|
err = <-errCh1c
|
||||||
require.Contains(t, err.Error(), "bad path")
|
require.Contains(t, err.Error(), "bad path")
|
||||||
@@ -358,7 +412,9 @@ func TestPrematureExpired(t *testing.T) {
|
|||||||
require.Equal(t, resp.llama, scenario1a.srv)
|
require.Equal(t, resp.llama, scenario1a.srv)
|
||||||
require.Len(t, s.pendingReqCh, 0)
|
require.Len(t, s.pendingReqCh, 0)
|
||||||
require.Len(t, errCh1a, 0)
|
require.Len(t, errCh1a, 0)
|
||||||
|
s.loadedMu.Lock()
|
||||||
require.Len(t, s.loaded, 1)
|
require.Len(t, s.loaded, 1)
|
||||||
|
s.loadedMu.Unlock()
|
||||||
slog.Info("sending premature expired event now")
|
slog.Info("sending premature expired event now")
|
||||||
s.expiredCh <- resp // Shouldn't happen in real life, but make sure its safe
|
s.expiredCh <- resp // Shouldn't happen in real life, but make sure its safe
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
@@ -383,6 +439,7 @@ func TestUseLoadedRunner(t *testing.T) {
|
|||||||
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
||||||
req := &LlmRequest{
|
req := &LlmRequest{
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
|
opts: api.DefaultOptions(),
|
||||||
successCh: make(chan *runnerRef, 1),
|
successCh: make(chan *runnerRef, 1),
|
||||||
sessionDuration: 2,
|
sessionDuration: 2,
|
||||||
}
|
}
|
||||||
@@ -426,8 +483,10 @@ func TestUpdateFreeSpace(t *testing.T) {
|
|||||||
r2 := &runnerRef{llama: llm2, gpus: gpus}
|
r2 := &runnerRef{llama: llm2, gpus: gpus}
|
||||||
|
|
||||||
s := InitScheduler(ctx)
|
s := InitScheduler(ctx)
|
||||||
|
s.loadedMu.Lock()
|
||||||
s.loaded["a"] = r1
|
s.loaded["a"] = r1
|
||||||
s.loaded["b"] = r2
|
s.loaded["b"] = r2
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
s.updateFreeSpace(gpus)
|
s.updateFreeSpace(gpus)
|
||||||
require.Equal(t, uint64(850), gpus[0].FreeMemory)
|
require.Equal(t, uint64(850), gpus[0].FreeMemory)
|
||||||
@@ -437,13 +496,18 @@ func TestUpdateFreeSpace(t *testing.T) {
|
|||||||
func TestFindRunnerToUnload(t *testing.T) {
|
func TestFindRunnerToUnload(t *testing.T) {
|
||||||
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
||||||
defer done()
|
defer done()
|
||||||
req := &LlmRequest{ctx: ctx}
|
req := &LlmRequest{
|
||||||
|
ctx: ctx,
|
||||||
|
opts: api.DefaultOptions(),
|
||||||
|
}
|
||||||
r1 := &runnerRef{refCount: 1, sessionDuration: 1}
|
r1 := &runnerRef{refCount: 1, sessionDuration: 1}
|
||||||
r2 := &runnerRef{sessionDuration: 2}
|
r2 := &runnerRef{sessionDuration: 2}
|
||||||
|
|
||||||
s := InitScheduler(ctx)
|
s := InitScheduler(ctx)
|
||||||
|
s.loadedMu.Lock()
|
||||||
s.loaded["a"] = r1
|
s.loaded["a"] = r1
|
||||||
s.loaded["b"] = r2
|
s.loaded["b"] = r2
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
resp := s.findRunnerToUnload(req)
|
resp := s.findRunnerToUnload(req)
|
||||||
require.Equal(t, r2, resp)
|
require.Equal(t, r2, resp)
|
||||||
@@ -458,10 +522,11 @@ func TestNeedsReload(t *testing.T) {
|
|||||||
defer done()
|
defer done()
|
||||||
|
|
||||||
llm := &mockLlm{}
|
llm := &mockLlm{}
|
||||||
|
do := api.DefaultOptions()
|
||||||
runner := &runnerRef{
|
runner := &runnerRef{
|
||||||
adapters: []string{"adapter1"},
|
adapters: []string{"adapter1"},
|
||||||
projectors: []string{"projector1"},
|
projectors: []string{"projector1"},
|
||||||
Options: &api.Options{},
|
Options: &do,
|
||||||
llama: llm,
|
llama: llm,
|
||||||
}
|
}
|
||||||
req := &LlmRequest{
|
req := &LlmRequest{
|
||||||
@@ -469,7 +534,7 @@ func TestNeedsReload(t *testing.T) {
|
|||||||
AdapterPaths: []string{"adapter2"},
|
AdapterPaths: []string{"adapter2"},
|
||||||
ProjectorPaths: []string{"projector2"},
|
ProjectorPaths: []string{"projector2"},
|
||||||
},
|
},
|
||||||
opts: api.Options{},
|
opts: api.DefaultOptions(),
|
||||||
}
|
}
|
||||||
resp := runner.needsReload(ctx, req)
|
resp := runner.needsReload(ctx, req)
|
||||||
require.True(t, resp)
|
require.True(t, resp)
|
||||||
@@ -508,8 +573,10 @@ func TestUnloadAllRunners(t *testing.T) {
|
|||||||
r1 := &runnerRef{llama: llm1}
|
r1 := &runnerRef{llama: llm1}
|
||||||
r2 := &runnerRef{llama: llm2}
|
r2 := &runnerRef{llama: llm2}
|
||||||
|
|
||||||
|
s.loadedMu.Lock()
|
||||||
s.loaded["a"] = r1
|
s.loaded["a"] = r1
|
||||||
s.loaded["b"] = r2
|
s.loaded["b"] = r2
|
||||||
|
s.loadedMu.Unlock()
|
||||||
s.unloadAllRunners()
|
s.unloadAllRunners()
|
||||||
|
|
||||||
require.True(t, llm1.closeCalled)
|
require.True(t, llm1.closeCalled)
|
||||||
|
|||||||
18
types/errtypes/errtypes.go
Normal file
18
types/errtypes/errtypes.go
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
// Package errtypes contains custom error types
|
||||||
|
package errtypes
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
const UnknownOllamaKeyErrMsg = "unknown ollama key"
|
||||||
|
|
||||||
|
// TODO: This should have a structured response from the API
|
||||||
|
type UnknownOllamaKey struct {
|
||||||
|
Key string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *UnknownOllamaKey) Error() string {
|
||||||
|
return fmt.Sprintf("unauthorized: %s %q", UnknownOllamaKeyErrMsg, strings.TrimSpace(e.Key))
|
||||||
|
}
|
||||||
@@ -1,87 +0,0 @@
|
|||||||
package model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"log/slog"
|
|
||||||
"strings"
|
|
||||||
"unicode"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Digest represents a digest of a model Manifest. It is a comparable value
|
|
||||||
// type and is immutable.
|
|
||||||
//
|
|
||||||
// The zero Digest is not a valid digest.
|
|
||||||
type Digest struct {
|
|
||||||
s string
|
|
||||||
}
|
|
||||||
|
|
||||||
// Split returns the digest type and the digest value.
|
|
||||||
func (d Digest) Split() (typ, digest string) {
|
|
||||||
typ, digest, _ = strings.Cut(d.s, "-")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// String returns the digest in the form of "<digest-type>-<digest>", or the
|
|
||||||
// empty string if the digest is invalid.
|
|
||||||
func (d Digest) String() string { return d.s }
|
|
||||||
|
|
||||||
// IsValid returns true if the digest is valid (not zero).
|
|
||||||
//
|
|
||||||
// A valid digest may be created only by ParseDigest, or
|
|
||||||
// ParseName(name).Digest().
|
|
||||||
func (d Digest) IsValid() bool { return d.s != "" }
|
|
||||||
|
|
||||||
// LogValue implements slog.Value.
|
|
||||||
func (d Digest) LogValue() slog.Value {
|
|
||||||
return slog.StringValue(d.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
_ slog.LogValuer = Digest{}
|
|
||||||
)
|
|
||||||
|
|
||||||
// ParseDigest parses a string in the form of "<digest-type>-<digest>" into a
|
|
||||||
// Digest.
|
|
||||||
func ParseDigest(s string) Digest {
|
|
||||||
typ, digest, ok := strings.Cut(s, "-")
|
|
||||||
if !ok {
|
|
||||||
typ, digest, ok = strings.Cut(s, ":")
|
|
||||||
}
|
|
||||||
if ok && isValidDigestType(typ) && isValidHex(digest) && len(digest) >= 2 {
|
|
||||||
return Digest{s: fmt.Sprintf("%s-%s", typ, digest)}
|
|
||||||
}
|
|
||||||
return Digest{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func MustParseDigest(s string) Digest {
|
|
||||||
d := ParseDigest(s)
|
|
||||||
if !d.IsValid() {
|
|
||||||
panic(fmt.Sprintf("invalid digest: %q", s))
|
|
||||||
}
|
|
||||||
return d
|
|
||||||
}
|
|
||||||
|
|
||||||
func isValidDigestType(s string) bool {
|
|
||||||
if len(s) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for _, r := range s {
|
|
||||||
if !unicode.IsLower(r) && !unicode.IsDigit(r) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func isValidHex(s string) bool {
|
|
||||||
if len(s) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for i := range s {
|
|
||||||
c := s[i]
|
|
||||||
if c < '0' || c > '9' && c < 'a' || c > 'f' {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
package model
|
|
||||||
|
|
||||||
import "testing"
|
|
||||||
|
|
||||||
var testDigests = map[string]Digest{
|
|
||||||
"": {},
|
|
||||||
"sha256-1234": {s: "sha256-1234"},
|
|
||||||
"sha256-5678": {s: "sha256-5678"},
|
|
||||||
"blake2-9abc": {s: "blake2-9abc"},
|
|
||||||
"-1234": {},
|
|
||||||
"sha256-": {},
|
|
||||||
"sha256-1234-5678": {},
|
|
||||||
"sha256-P": {}, // invalid hex
|
|
||||||
"sha256-1234P": {},
|
|
||||||
"---": {},
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestDigestParse(t *testing.T) {
|
|
||||||
// Test cases.
|
|
||||||
for s, want := range testDigests {
|
|
||||||
got := ParseDigest(s)
|
|
||||||
t.Logf("ParseDigest(%q) = %#v", s, got)
|
|
||||||
if got != want {
|
|
||||||
t.Errorf("ParseDigest(%q) = %q; want %q", s, got, want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestDigestString(t *testing.T) {
|
|
||||||
// Test cases.
|
|
||||||
for s, d := range testDigests {
|
|
||||||
want := s
|
|
||||||
if !d.IsValid() {
|
|
||||||
want = ""
|
|
||||||
}
|
|
||||||
got := d.String()
|
|
||||||
if got != want {
|
|
||||||
t.Errorf("ParseDigest(%q).String() = %q; want %q", s, got, want)
|
|
||||||
}
|
|
||||||
|
|
||||||
got = ParseDigest(s).String()
|
|
||||||
if got != want {
|
|
||||||
t.Errorf("roundtrip ParseDigest(%q).String() = %q; want %q", s, got, want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,2 +1,2 @@
|
|||||||
go test fuzz v1
|
go test fuzz v1
|
||||||
string(":")
|
string("00@")
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
go test fuzz v1
|
|
||||||
string("/0")
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
go test fuzz v1
|
|
||||||
string("0//0")
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
go test fuzz v1
|
|
||||||
string("0 /0")
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
go test fuzz v1
|
|
||||||
string("+0/00000")
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
go test fuzz v1
|
|
||||||
string("0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91")
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
// Copyright (c) Tailscale Inc & AUTHORS
|
|
||||||
// SPDX-License-Identifier: BSD-3-Clause
|
|
||||||
|
|
||||||
// Package structs contains the Incomparable type.
|
|
||||||
package structs
|
|
||||||
|
|
||||||
// Incomparable is a zero-width incomparable type. If added as the
|
|
||||||
// first field in a struct, it marks that struct as not comparable
|
|
||||||
// (can't do == or be a map key) and usually doesn't add any width to
|
|
||||||
// the struct (unless the struct has only small fields).
|
|
||||||
//
|
|
||||||
// By making a struct incomparable, you can prevent misuse (prevent
|
|
||||||
// people from using ==), but also you can shrink generated binaries,
|
|
||||||
// as the compiler can omit equality funcs from the binary.
|
|
||||||
type Incomparable [0]func()
|
|
||||||
Reference in New Issue
Block a user