mirror of
https://github.com/ollama/ollama.git
synced 2026-04-18 07:54:17 +02:00
Compare commits
121 Commits
mattw/pyth
...
v0.1.17
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6b5bdfa6c9 | ||
|
|
c063ee4af0 | ||
|
|
d99fa6ce0a | ||
|
|
3948c6ea06 | ||
|
|
b85982eb91 | ||
|
|
86b0dd4b16 | ||
|
|
f728738427 | ||
|
|
115048a0d8 | ||
|
|
1b417a7836 | ||
|
|
0174665d0e | ||
|
|
630518f0d9 | ||
|
|
6e16098a60 | ||
|
|
6ee8c80199 | ||
|
|
31f0551dab | ||
|
|
4a1abfe4fa | ||
|
|
bbd41494bf | ||
|
|
fedba24a63 | ||
|
|
e3b090dbc5 | ||
|
|
d9e60f634b | ||
|
|
4251b342de | ||
|
|
0a9d348023 | ||
|
|
3144e2a439 | ||
|
|
c0960e29b5 | ||
|
|
5314fc9b63 | ||
|
|
a36b5fef3b | ||
|
|
910e9401d0 | ||
|
|
56ffc3023a | ||
|
|
7a1b37ac64 | ||
|
|
5d4d2e2c60 | ||
|
|
7db5bcf73b | ||
|
|
fa2f095bd9 | ||
|
|
045b855db9 | ||
|
|
32064a0646 | ||
|
|
d9a250e9b5 | ||
|
|
944519ed16 | ||
|
|
2dd040d04c | ||
|
|
bbe41ce41a | ||
|
|
9e1406e4ed | ||
|
|
b74580c913 | ||
|
|
7e9405fd07 | ||
|
|
3b0b8930d4 | ||
|
|
e3f925fc1b | ||
|
|
2a2289fb6b | ||
|
|
dd427f499a | ||
|
|
2ae573c7ed | ||
|
|
02fe26c44b | ||
|
|
16c7548460 | ||
|
|
fa75998c0d | ||
|
|
5344f886c8 | ||
|
|
6cc823c9b5 | ||
|
|
b84d34e632 | ||
|
|
30229a913c | ||
|
|
1ade380bd7 | ||
|
|
ba264e9da8 | ||
|
|
a2405ec831 | ||
|
|
ce809bb529 | ||
|
|
76bc4d0458 | ||
|
|
4a02945a15 | ||
|
|
aec742b6d2 | ||
|
|
f337642e94 | ||
|
|
51131cc6e2 | ||
|
|
43027789dc | ||
|
|
f9b7d65e2b | ||
|
|
1f05d77110 | ||
|
|
c3ff36088b | ||
|
|
13524b5e72 | ||
|
|
f1b049fed8 | ||
|
|
97c5696945 | ||
|
|
47d4e22673 | ||
|
|
32f62fbb8e | ||
|
|
5d75505ebd | ||
|
|
b9495ea162 | ||
|
|
409bb9674e | ||
|
|
d3479c07a1 | ||
|
|
b12f1b984f | ||
|
|
195e3d9dbd | ||
|
|
38fe1a368b | ||
|
|
4b77fcb2b9 | ||
|
|
cde13bcdea | ||
|
|
0f0cd265a7 | ||
|
|
0db4706ec2 | ||
|
|
1ebdbd9694 | ||
|
|
5c59455b59 | ||
|
|
00d06619a1 | ||
|
|
f1ef3f9947 | ||
|
|
5a5dca13b2 | ||
|
|
7232f1fa41 | ||
|
|
72e7a49aa9 | ||
|
|
a3737cbd33 | ||
|
|
998f1785b6 | ||
|
|
70a93057cd | ||
|
|
2cb0fa7d40 | ||
|
|
b2816bca67 | ||
|
|
bf704423c5 | ||
|
|
7a0899d62d | ||
|
|
0cca1486dd | ||
|
|
2113c9d31a | ||
|
|
6deebf2489 | ||
|
|
95cb38ae47 | ||
|
|
1f126afb2d | ||
|
|
f6201a7a6c | ||
|
|
b3f6c6598f | ||
|
|
88620e983a | ||
|
|
cedae0d17a | ||
|
|
bb80a597db | ||
|
|
6681d37861 | ||
|
|
0409c1fa59 | ||
|
|
b56e92470a | ||
|
|
5687f1a0cf | ||
|
|
7eda3d0c55 | ||
|
|
7194a07d4d | ||
|
|
13efd5f218 | ||
|
|
c4bdfffd96 | ||
|
|
26c63418e0 | ||
|
|
2799784ac8 | ||
|
|
91897a606f | ||
|
|
96122b7271 | ||
|
|
39be7fdb98 | ||
|
|
c2e3b89176 | ||
|
|
cde31cb220 | ||
|
|
63097607b2 |
@@ -19,5 +19,11 @@ RUN apt-get update && apt-get install -y ca-certificates
|
|||||||
COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
|
COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
|
||||||
EXPOSE 11434
|
EXPOSE 11434
|
||||||
ENV OLLAMA_HOST 0.0.0.0
|
ENV OLLAMA_HOST 0.0.0.0
|
||||||
|
|
||||||
|
# set some environment variable for better NVIDIA compatibility
|
||||||
|
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||||
|
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
||||||
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
|
|
||||||
ENTRYPOINT ["/bin/ollama"]
|
ENTRYPOINT ["/bin/ollama"]
|
||||||
CMD ["serve"]
|
CMD ["serve"]
|
||||||
|
|||||||
39
README.md
39
README.md
@@ -57,6 +57,7 @@ Here are some example open-source models that can be downloaded:
|
|||||||
| Llama 2 70B | 70B | 39GB | `ollama run llama2:70b` |
|
| Llama 2 70B | 70B | 39GB | `ollama run llama2:70b` |
|
||||||
| Orca Mini | 3B | 1.9GB | `ollama run orca-mini` |
|
| Orca Mini | 3B | 1.9GB | `ollama run orca-mini` |
|
||||||
| Vicuna | 7B | 3.8GB | `ollama run vicuna` |
|
| Vicuna | 7B | 3.8GB | `ollama run vicuna` |
|
||||||
|
| LLaVA | 7B | 4.5GB | `ollama run llava` |
|
||||||
|
|
||||||
> Note: You should have at least 8 GB of RAM to run the 3B models, 16 GB to run the 7B models, and 32 GB to run the 13B models.
|
> Note: You should have at least 8 GB of RAM to run the 3B models, 16 GB to run the 7B models, and 32 GB to run the 13B models.
|
||||||
|
|
||||||
@@ -104,7 +105,7 @@ FROM llama2
|
|||||||
# set the temperature to 1 [higher is more creative, lower is more coherent]
|
# set the temperature to 1 [higher is more creative, lower is more coherent]
|
||||||
PARAMETER temperature 1
|
PARAMETER temperature 1
|
||||||
|
|
||||||
# set the system prompt
|
# set the system message
|
||||||
SYSTEM """
|
SYSTEM """
|
||||||
You are Mario from Super Mario Bros. Answer as Mario, the assistant, only.
|
You are Mario from Super Mario Bros. Answer as Mario, the assistant, only.
|
||||||
"""
|
"""
|
||||||
@@ -158,6 +159,13 @@ For multiline input, you can wrap text with `"""`:
|
|||||||
I'm a basic program that prints the famous "Hello, world!" message to the console.
|
I'm a basic program that prints the famous "Hello, world!" message to the console.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Multimodal models
|
||||||
|
|
||||||
|
```
|
||||||
|
>>> What's in this image? /Users/jmorgan/Desktop/smile.png
|
||||||
|
The image features a yellow smiley face, which is likely the central focus of the picture.
|
||||||
|
```
|
||||||
|
|
||||||
### Pass in prompt as arguments
|
### Pass in prompt as arguments
|
||||||
|
|
||||||
```
|
```
|
||||||
@@ -205,7 +213,8 @@ Finally, in a separate shell, run a model:
|
|||||||
## REST API
|
## REST API
|
||||||
|
|
||||||
Ollama has a REST API for running and managing models.
|
Ollama has a REST API for running and managing models.
|
||||||
For example, to generate text from a model:
|
|
||||||
|
### Generate a response
|
||||||
|
|
||||||
```
|
```
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
@@ -214,16 +223,23 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Chat with a model
|
||||||
|
|
||||||
|
```
|
||||||
|
curl http://localhost:11434/api/chat -d '{
|
||||||
|
"model": "mistral",
|
||||||
|
"messages": [
|
||||||
|
{ "role": "user", "content": "why is the sky blue?" }
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
See the [API documentation](./docs/api.md) for all endpoints.
|
See the [API documentation](./docs/api.md) for all endpoints.
|
||||||
|
|
||||||
## Community Integrations
|
## Community Integrations
|
||||||
|
|
||||||
### Mobile
|
|
||||||
|
|
||||||
- [Mobile Artificial Intelligence Distribution](https://github.com/MaidFoundation/Maid) (Maid)
|
|
||||||
|
|
||||||
### Web & Desktop
|
### Web & Desktop
|
||||||
|
- [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
|
||||||
- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
|
- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
|
||||||
- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
|
- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
|
||||||
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
|
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
|
||||||
@@ -233,6 +249,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [big-AGI](https://github.com/enricoros/big-agi/blob/main/docs/config-ollama.md)
|
- [big-AGI](https://github.com/enricoros/big-agi/blob/main/docs/config-ollama.md)
|
||||||
- [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core)
|
- [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core)
|
||||||
- [Amica](https://github.com/semperai/amica)
|
- [Amica](https://github.com/semperai/amica)
|
||||||
|
- [chatd](https://github.com/BruceMacD/chatd)
|
||||||
|
|
||||||
### Terminal
|
### Terminal
|
||||||
|
|
||||||
@@ -245,6 +262,10 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [gptel Emacs client](https://github.com/karthink/gptel)
|
- [gptel Emacs client](https://github.com/karthink/gptel)
|
||||||
- [Oatmeal](https://github.com/dustinblackman/oatmeal)
|
- [Oatmeal](https://github.com/dustinblackman/oatmeal)
|
||||||
|
|
||||||
|
### Database
|
||||||
|
|
||||||
|
- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md)
|
||||||
|
|
||||||
### Package managers
|
### Package managers
|
||||||
|
|
||||||
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
|
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
|
||||||
@@ -265,7 +286,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
|
|
||||||
### Mobile
|
### Mobile
|
||||||
|
|
||||||
- [Maid](https://github.com/danemadsen/Maid) (Mobile Artificial Intelligence Distribution)
|
- [Enchanted](https://github.com/AugustDev/enchanted)
|
||||||
|
- [Maid](https://github.com/danemadsen/Maid)
|
||||||
|
|
||||||
### Extensions & Plugins
|
### Extensions & Plugins
|
||||||
|
|
||||||
@@ -276,6 +298,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Logseq Ollama plugin](https://github.com/omagdy7/ollama-logseq)
|
- [Logseq Ollama plugin](https://github.com/omagdy7/ollama-logseq)
|
||||||
- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
|
- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
|
||||||
- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
|
- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
|
||||||
|
- [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
|
||||||
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
|
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
|
||||||
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
|
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
|
||||||
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
|
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
|
||||||
|
|||||||
@@ -221,6 +221,19 @@ func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn Generate
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ChatResponseFunc func(ChatResponse) error
|
||||||
|
|
||||||
|
func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc) error {
|
||||||
|
return c.stream(ctx, http.MethodPost, "/api/chat", req, func(bts []byte) error {
|
||||||
|
var resp ChatResponse
|
||||||
|
if err := json.Unmarshal(bts, &resp); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return fn(resp)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
type PullProgressFunc func(ProgressResponse) error
|
type PullProgressFunc func(ProgressResponse) error
|
||||||
|
|
||||||
func (c *Client) Pull(ctx context.Context, req *PullRequest, fn PullProgressFunc) error {
|
func (c *Client) Pull(ctx context.Context, req *PullRequest, fn PullProgressFunc) error {
|
||||||
@@ -311,3 +324,15 @@ func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) err
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Client) Version(ctx context.Context) (string, error) {
|
||||||
|
var version struct {
|
||||||
|
Version string `json:"version"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := c.do(ctx, http.MethodGet, "/api/version", nil, &version); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return version.Version, nil
|
||||||
|
}
|
||||||
|
|||||||
145
api/types.go
145
api/types.go
@@ -6,6 +6,7 @@ import (
|
|||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@@ -30,6 +31,8 @@ func (e StatusError) Error() string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ImageData []byte
|
||||||
|
|
||||||
type GenerateRequest struct {
|
type GenerateRequest struct {
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
Prompt string `json:"prompt"`
|
Prompt string `json:"prompt"`
|
||||||
@@ -39,10 +42,45 @@ type GenerateRequest struct {
|
|||||||
Stream *bool `json:"stream,omitempty"`
|
Stream *bool `json:"stream,omitempty"`
|
||||||
Raw bool `json:"raw,omitempty"`
|
Raw bool `json:"raw,omitempty"`
|
||||||
Format string `json:"format"`
|
Format string `json:"format"`
|
||||||
|
Images []ImageData `json:"images,omitempty"`
|
||||||
|
|
||||||
Options map[string]interface{} `json:"options"`
|
Options map[string]interface{} `json:"options"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ChatRequest struct {
|
||||||
|
Model string `json:"model"`
|
||||||
|
Messages []Message `json:"messages"`
|
||||||
|
Stream *bool `json:"stream,omitempty"`
|
||||||
|
Format string `json:"format"`
|
||||||
|
|
||||||
|
Options map[string]interface{} `json:"options"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Message struct {
|
||||||
|
Role string `json:"role"` // one of ["system", "user", "assistant"]
|
||||||
|
Content string `json:"content"`
|
||||||
|
Images []ImageData `json:"images,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ChatResponse struct {
|
||||||
|
Model string `json:"model"`
|
||||||
|
CreatedAt time.Time `json:"created_at"`
|
||||||
|
Message Message `json:"message"`
|
||||||
|
|
||||||
|
Done bool `json:"done"`
|
||||||
|
|
||||||
|
Metrics
|
||||||
|
}
|
||||||
|
|
||||||
|
type Metrics struct {
|
||||||
|
TotalDuration time.Duration `json:"total_duration,omitempty"`
|
||||||
|
LoadDuration time.Duration `json:"load_duration,omitempty"`
|
||||||
|
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
|
||||||
|
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
|
||||||
|
EvalCount int `json:"eval_count,omitempty"`
|
||||||
|
EvalDuration time.Duration `json:"eval_duration,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// Options specfied in GenerateRequest, if you add a new option here add it to the API docs also
|
// Options specfied in GenerateRequest, if you add a new option here add it to the API docs also
|
||||||
type Options struct {
|
type Options struct {
|
||||||
Runner
|
Runner
|
||||||
@@ -119,6 +157,7 @@ type ShowResponse struct {
|
|||||||
Parameters string `json:"parameters,omitempty"`
|
Parameters string `json:"parameters,omitempty"`
|
||||||
Template string `json:"template,omitempty"`
|
Template string `json:"template,omitempty"`
|
||||||
System string `json:"system,omitempty"`
|
System string `json:"system,omitempty"`
|
||||||
|
Details ModelDetails `json:"details,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type CopyRequest struct {
|
type CopyRequest struct {
|
||||||
@@ -158,6 +197,7 @@ type ModelResponse struct {
|
|||||||
ModifiedAt time.Time `json:"modified_at"`
|
ModifiedAt time.Time `json:"modified_at"`
|
||||||
Size int64 `json:"size"`
|
Size int64 `json:"size"`
|
||||||
Digest string `json:"digest"`
|
Digest string `json:"digest"`
|
||||||
|
Details ModelDetails `json:"details,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type TokenResponse struct {
|
type TokenResponse struct {
|
||||||
@@ -172,39 +212,42 @@ type GenerateResponse struct {
|
|||||||
Done bool `json:"done"`
|
Done bool `json:"done"`
|
||||||
Context []int `json:"context,omitempty"`
|
Context []int `json:"context,omitempty"`
|
||||||
|
|
||||||
TotalDuration time.Duration `json:"total_duration,omitempty"`
|
Metrics
|
||||||
LoadDuration time.Duration `json:"load_duration,omitempty"`
|
|
||||||
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
|
|
||||||
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
|
|
||||||
EvalCount int `json:"eval_count,omitempty"`
|
|
||||||
EvalDuration time.Duration `json:"eval_duration,omitempty"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *GenerateResponse) Summary() {
|
type ModelDetails struct {
|
||||||
if r.TotalDuration > 0 {
|
Format string `json:"format"`
|
||||||
fmt.Fprintf(os.Stderr, "total duration: %v\n", r.TotalDuration)
|
Family string `json:"family"`
|
||||||
|
Families []string `json:"families"`
|
||||||
|
ParameterSize string `json:"parameter_size"`
|
||||||
|
QuantizationLevel string `json:"quantization_level"`
|
||||||
}
|
}
|
||||||
|
|
||||||
if r.LoadDuration > 0 {
|
func (m *Metrics) Summary() {
|
||||||
fmt.Fprintf(os.Stderr, "load duration: %v\n", r.LoadDuration)
|
if m.TotalDuration > 0 {
|
||||||
|
fmt.Fprintf(os.Stderr, "total duration: %v\n", m.TotalDuration)
|
||||||
}
|
}
|
||||||
|
|
||||||
if r.PromptEvalCount > 0 {
|
if m.LoadDuration > 0 {
|
||||||
fmt.Fprintf(os.Stderr, "prompt eval count: %d token(s)\n", r.PromptEvalCount)
|
fmt.Fprintf(os.Stderr, "load duration: %v\n", m.LoadDuration)
|
||||||
}
|
}
|
||||||
|
|
||||||
if r.PromptEvalDuration > 0 {
|
if m.PromptEvalCount > 0 {
|
||||||
fmt.Fprintf(os.Stderr, "prompt eval duration: %s\n", r.PromptEvalDuration)
|
fmt.Fprintf(os.Stderr, "prompt eval count: %d token(s)\n", m.PromptEvalCount)
|
||||||
fmt.Fprintf(os.Stderr, "prompt eval rate: %.2f tokens/s\n", float64(r.PromptEvalCount)/r.PromptEvalDuration.Seconds())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if r.EvalCount > 0 {
|
if m.PromptEvalDuration > 0 {
|
||||||
fmt.Fprintf(os.Stderr, "eval count: %d token(s)\n", r.EvalCount)
|
fmt.Fprintf(os.Stderr, "prompt eval duration: %s\n", m.PromptEvalDuration)
|
||||||
|
fmt.Fprintf(os.Stderr, "prompt eval rate: %.2f tokens/s\n", float64(m.PromptEvalCount)/m.PromptEvalDuration.Seconds())
|
||||||
}
|
}
|
||||||
|
|
||||||
if r.EvalDuration > 0 {
|
if m.EvalCount > 0 {
|
||||||
fmt.Fprintf(os.Stderr, "eval duration: %s\n", r.EvalDuration)
|
fmt.Fprintf(os.Stderr, "eval count: %d token(s)\n", m.EvalCount)
|
||||||
fmt.Fprintf(os.Stderr, "eval rate: %.2f tokens/s\n", float64(r.EvalCount)/r.EvalDuration.Seconds())
|
}
|
||||||
|
|
||||||
|
if m.EvalDuration > 0 {
|
||||||
|
fmt.Fprintf(os.Stderr, "eval duration: %s\n", m.EvalDuration)
|
||||||
|
fmt.Fprintf(os.Stderr, "eval rate: %.2f tokens/s\n", float64(m.EvalCount)/m.EvalDuration.Seconds())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -360,3 +403,63 @@ func (d *Duration) UnmarshalJSON(b []byte) (err error) {
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FormatParams converts specified parameter options to their correct types
|
||||||
|
func FormatParams(params map[string][]string) (map[string]interface{}, error) {
|
||||||
|
opts := Options{}
|
||||||
|
valueOpts := reflect.ValueOf(&opts).Elem() // names of the fields in the options struct
|
||||||
|
typeOpts := reflect.TypeOf(opts) // types of the fields in the options struct
|
||||||
|
|
||||||
|
// build map of json struct tags to their types
|
||||||
|
jsonOpts := make(map[string]reflect.StructField)
|
||||||
|
for _, field := range reflect.VisibleFields(typeOpts) {
|
||||||
|
jsonTag := strings.Split(field.Tag.Get("json"), ",")[0]
|
||||||
|
if jsonTag != "" {
|
||||||
|
jsonOpts[jsonTag] = field
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(map[string]interface{})
|
||||||
|
// iterate params and set values based on json struct tags
|
||||||
|
for key, vals := range params {
|
||||||
|
if opt, ok := jsonOpts[key]; !ok {
|
||||||
|
return nil, fmt.Errorf("unknown parameter '%s'", key)
|
||||||
|
} else {
|
||||||
|
field := valueOpts.FieldByName(opt.Name)
|
||||||
|
if field.IsValid() && field.CanSet() {
|
||||||
|
switch field.Kind() {
|
||||||
|
case reflect.Float32:
|
||||||
|
floatVal, err := strconv.ParseFloat(vals[0], 32)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid float value %s", vals)
|
||||||
|
}
|
||||||
|
|
||||||
|
out[key] = float32(floatVal)
|
||||||
|
case reflect.Int:
|
||||||
|
intVal, err := strconv.ParseInt(vals[0], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid int value %s", vals)
|
||||||
|
}
|
||||||
|
|
||||||
|
out[key] = intVal
|
||||||
|
case reflect.Bool:
|
||||||
|
boolVal, err := strconv.ParseBool(vals[0])
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid bool value %s", vals)
|
||||||
|
}
|
||||||
|
|
||||||
|
out[key] = boolVal
|
||||||
|
case reflect.String:
|
||||||
|
out[key] = vals[0]
|
||||||
|
case reflect.Slice:
|
||||||
|
// TODO: only string slices are supported right now
|
||||||
|
out[key] = vals
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|||||||
446
cmd/cmd.go
446
cmd/cmd.go
@@ -17,6 +17,7 @@ import (
|
|||||||
"os/exec"
|
"os/exec"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
@@ -25,6 +26,7 @@ import (
|
|||||||
"github.com/olekukonko/tablewriter"
|
"github.com/olekukonko/tablewriter"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
"golang.org/x/crypto/ssh"
|
"golang.org/x/crypto/ssh"
|
||||||
|
"golang.org/x/exp/slices"
|
||||||
"golang.org/x/term"
|
"golang.org/x/term"
|
||||||
|
|
||||||
"github.com/jmorganca/ollama/api"
|
"github.com/jmorganca/ollama/api"
|
||||||
@@ -36,6 +38,8 @@ import (
|
|||||||
"github.com/jmorganca/ollama/version"
|
"github.com/jmorganca/ollama/version"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type ImageData []byte
|
||||||
|
|
||||||
func CreateHandler(cmd *cobra.Command, args []string) error {
|
func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||||
filename, _ := cmd.Flags().GetString("file")
|
filename, _ := cmd.Flags().GetString("file")
|
||||||
filename, err := filepath.Abs(filename)
|
filename, err := filepath.Abs(filename)
|
||||||
@@ -133,7 +137,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)}
|
request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)}
|
||||||
if err := client.Create(context.Background(), &request, fn); err != nil {
|
if err := client.Create(cmd.Context(), &request, fn); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -148,7 +152,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
|||||||
|
|
||||||
name := args[0]
|
name := args[0]
|
||||||
// check if the model exists on the server
|
// check if the model exists on the server
|
||||||
_, err = client.Show(context.Background(), &api.ShowRequest{Name: name})
|
_, err = client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||||
var statusError api.StatusError
|
var statusError api.StatusError
|
||||||
switch {
|
switch {
|
||||||
case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
|
case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
|
||||||
@@ -208,7 +212,7 @@ func PushHandler(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
request := api.PushRequest{Name: args[0], Insecure: insecure}
|
request := api.PushRequest{Name: args[0], Insecure: insecure}
|
||||||
if err := client.Push(context.Background(), &request, fn); err != nil {
|
if err := client.Push(cmd.Context(), &request, fn); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -222,7 +226,7 @@ func ListHandler(cmd *cobra.Command, args []string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
models, err := client.List(context.Background())
|
models, err := client.List(cmd.Context())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -257,7 +261,7 @@ func DeleteHandler(cmd *cobra.Command, args []string) error {
|
|||||||
|
|
||||||
for _, name := range args {
|
for _, name := range args {
|
||||||
req := api.DeleteRequest{Name: name}
|
req := api.DeleteRequest{Name: name}
|
||||||
if err := client.Delete(context.Background(), &req); err != nil {
|
if err := client.Delete(cmd.Context(), &req); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
fmt.Printf("deleted '%s'\n", name)
|
fmt.Printf("deleted '%s'\n", name)
|
||||||
@@ -322,7 +326,7 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
req := api.ShowRequest{Name: args[0]}
|
req := api.ShowRequest{Name: args[0]}
|
||||||
resp, err := client.Show(context.Background(), &req)
|
resp, err := client.Show(cmd.Context(), &req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -350,7 +354,7 @@ func CopyHandler(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
req := api.CopyRequest{Source: args[0], Destination: args[1]}
|
req := api.CopyRequest{Source: args[0], Destination: args[1]}
|
||||||
if err := client.Copy(context.Background(), &req); err != nil {
|
if err := client.Copy(cmd.Context(), &req); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
fmt.Printf("copied '%s' to '%s'\n", args[0], args[1])
|
fmt.Printf("copied '%s' to '%s'\n", args[0], args[1])
|
||||||
@@ -404,7 +408,7 @@ func PullHandler(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
request := api.PullRequest{Name: args[0], Insecure: insecure}
|
request := api.PullRequest{Name: args[0], Insecure: insecure}
|
||||||
if err := client.Pull(context.Background(), &request, fn); err != nil {
|
if err := client.Pull(cmd.Context(), &request, fn); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -412,13 +416,22 @@ func PullHandler(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func RunGenerate(cmd *cobra.Command, args []string) error {
|
func RunGenerate(cmd *cobra.Command, args []string) error {
|
||||||
|
interactive := true
|
||||||
|
|
||||||
|
opts := generateOptions{
|
||||||
|
Model: args[0],
|
||||||
|
WordWrap: os.Getenv("TERM") == "xterm-256color",
|
||||||
|
Options: map[string]interface{}{},
|
||||||
|
Images: []ImageData{},
|
||||||
|
}
|
||||||
|
|
||||||
format, err := cmd.Flags().GetString("format")
|
format, err := cmd.Flags().GetString("format")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
opts.Format = format
|
||||||
|
|
||||||
prompts := args[1:]
|
prompts := args[1:]
|
||||||
|
|
||||||
// prepend stdin to the prompt if provided
|
// prepend stdin to the prompt if provided
|
||||||
if !term.IsTerminal(int(os.Stdin.Fd())) {
|
if !term.IsTerminal(int(os.Stdin.Fd())) {
|
||||||
in, err := io.ReadAll(os.Stdin)
|
in, err := io.ReadAll(os.Stdin)
|
||||||
@@ -427,34 +440,41 @@ func RunGenerate(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
prompts = append([]string{string(in)}, prompts...)
|
prompts = append([]string{string(in)}, prompts...)
|
||||||
|
opts.WordWrap = false
|
||||||
|
interactive = false
|
||||||
}
|
}
|
||||||
|
opts.Prompt = strings.Join(prompts, " ")
|
||||||
// output is being piped
|
if len(prompts) > 0 {
|
||||||
if !term.IsTerminal(int(os.Stdout.Fd())) {
|
interactive = false
|
||||||
return generate(cmd, args[0], strings.Join(prompts, " "), false, format)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wordWrap := os.Getenv("TERM") == "xterm-256color"
|
|
||||||
|
|
||||||
nowrap, err := cmd.Flags().GetBool("nowordwrap")
|
nowrap, err := cmd.Flags().GetBool("nowordwrap")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if nowrap {
|
opts.WordWrap = !nowrap
|
||||||
wordWrap = false
|
|
||||||
|
if !interactive {
|
||||||
|
return generate(cmd, opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
// prompts are provided via stdin or args so don't enter interactive mode
|
return generateInteractive(cmd, opts)
|
||||||
if len(prompts) > 0 {
|
|
||||||
return generate(cmd, args[0], strings.Join(prompts, " "), wordWrap, format)
|
|
||||||
}
|
|
||||||
|
|
||||||
return generateInteractive(cmd, args[0], wordWrap, format)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type generateContextKey string
|
type generateContextKey string
|
||||||
|
|
||||||
func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format string) error {
|
type generateOptions struct {
|
||||||
|
Model string
|
||||||
|
Prompt string
|
||||||
|
WordWrap bool
|
||||||
|
Format string
|
||||||
|
System string
|
||||||
|
Template string
|
||||||
|
Images []ImageData
|
||||||
|
Options map[string]interface{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func generate(cmd *cobra.Command, opts generateOptions) error {
|
||||||
client, err := api.ClientFromEnvironment()
|
client, err := api.ClientFromEnvironment()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -475,34 +495,39 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
|
|||||||
|
|
||||||
termWidth, _, err := term.GetSize(int(os.Stdout.Fd()))
|
termWidth, _, err := term.GetSize(int(os.Stdout.Fd()))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
wordWrap = false
|
opts.WordWrap = false
|
||||||
}
|
}
|
||||||
|
|
||||||
cancelCtx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(cmd.Context())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
sigChan := make(chan os.Signal, 1)
|
sigChan := make(chan os.Signal, 1)
|
||||||
signal.Notify(sigChan, syscall.SIGINT)
|
signal.Notify(sigChan, syscall.SIGINT)
|
||||||
var abort bool
|
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
<-sigChan
|
<-sigChan
|
||||||
cancel()
|
cancel()
|
||||||
abort = true
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
var currentLineLength int
|
var currentLineLength int
|
||||||
var wordBuffer string
|
var wordBuffer string
|
||||||
|
|
||||||
request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format}
|
|
||||||
fn := func(response api.GenerateResponse) error {
|
fn := func(response api.GenerateResponse) error {
|
||||||
p.StopAndClear()
|
p.StopAndClear()
|
||||||
|
|
||||||
latest = response
|
latest = response
|
||||||
|
|
||||||
if wordWrap {
|
termWidth, _, _ = term.GetSize(int(os.Stdout.Fd()))
|
||||||
|
if opts.WordWrap && termWidth >= 10 {
|
||||||
for _, ch := range response.Response {
|
for _, ch := range response.Response {
|
||||||
if currentLineLength+1 > termWidth-5 {
|
if currentLineLength+1 > termWidth-5 {
|
||||||
|
if len(wordBuffer) > termWidth-10 {
|
||||||
|
fmt.Printf("%s%c", wordBuffer, ch)
|
||||||
|
wordBuffer = ""
|
||||||
|
currentLineLength = 0
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// backtrack the length of the last word and clear to the end of the line
|
// backtrack the length of the last word and clear to the end of the line
|
||||||
fmt.Printf("\x1b[%dD\x1b[K\n", len(wordBuffer))
|
fmt.Printf("\x1b[%dD\x1b[K\n", len(wordBuffer))
|
||||||
fmt.Printf("%s%c", wordBuffer, ch)
|
fmt.Printf("%s%c", wordBuffer, ch)
|
||||||
@@ -522,29 +547,44 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
fmt.Print(response.Response)
|
fmt.Printf("%s%s", wordBuffer, response.Response)
|
||||||
|
if len(wordBuffer) > 0 {
|
||||||
|
wordBuffer = ""
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := client.Generate(cancelCtx, &request, fn); err != nil {
|
images := make([]api.ImageData, 0)
|
||||||
if strings.Contains(err.Error(), "context canceled") && abort {
|
for _, i := range opts.Images {
|
||||||
|
images = append(images, api.ImageData(i))
|
||||||
|
}
|
||||||
|
request := api.GenerateRequest{
|
||||||
|
Model: opts.Model,
|
||||||
|
Prompt: opts.Prompt,
|
||||||
|
Context: generateContext,
|
||||||
|
Format: opts.Format,
|
||||||
|
System: opts.System,
|
||||||
|
Template: opts.Template,
|
||||||
|
Options: opts.Options,
|
||||||
|
Images: images,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := client.Generate(ctx, &request, fn); err != nil {
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if prompt != "" {
|
if opts.Prompt != "" {
|
||||||
fmt.Println()
|
fmt.Println()
|
||||||
fmt.Println()
|
fmt.Println()
|
||||||
}
|
}
|
||||||
|
|
||||||
if !latest.Done {
|
if !latest.Done {
|
||||||
if abort {
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return errors.New("unexpected end of response")
|
|
||||||
}
|
|
||||||
|
|
||||||
verbose, err := cmd.Flags().GetBool("verbose")
|
verbose, err := cmd.Flags().GetBool("verbose")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -555,16 +595,48 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
|
|||||||
latest.Summary()
|
latest.Summary()
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx := cmd.Context()
|
ctx = context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context)
|
||||||
ctx = context.WithValue(ctx, generateContextKey("context"), latest.Context)
|
|
||||||
cmd.SetContext(ctx)
|
cmd.SetContext(ctx)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format string) error {
|
type MultilineState int
|
||||||
|
|
||||||
|
const (
|
||||||
|
MultilineNone MultilineState = iota
|
||||||
|
MultilinePrompt
|
||||||
|
MultilineSystem
|
||||||
|
MultilineTemplate
|
||||||
|
)
|
||||||
|
|
||||||
|
func modelIsMultiModal(cmd *cobra.Command, name string) bool {
|
||||||
|
// get model details
|
||||||
|
client, err := api.ClientFromEnvironment()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("error: couldn't connect to ollama server")
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
req := api.ShowRequest{Name: name}
|
||||||
|
resp, err := client.Show(cmd.Context(), &req)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return slices.Contains(resp.Details.Families, "clip")
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||||
|
multiModal := modelIsMultiModal(cmd, opts.Model)
|
||||||
|
|
||||||
// load the model
|
// load the model
|
||||||
if err := generate(cmd, model, "", false, ""); err != nil {
|
loadOpts := generateOptions{
|
||||||
|
Model: opts.Model,
|
||||||
|
Prompt: "",
|
||||||
|
Images: []ImageData{},
|
||||||
|
}
|
||||||
|
if err := generate(cmd, loadOpts); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -581,6 +653,9 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
|||||||
|
|
||||||
usageSet := func() {
|
usageSet := func() {
|
||||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||||
|
fmt.Fprintln(os.Stderr, " /set parameter ... Set a parameter")
|
||||||
|
fmt.Fprintln(os.Stderr, " /set system <string> Set system message")
|
||||||
|
fmt.Fprintln(os.Stderr, " /set template <string> Set prompt template")
|
||||||
fmt.Fprintln(os.Stderr, " /set history Enable history")
|
fmt.Fprintln(os.Stderr, " /set history Enable history")
|
||||||
fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
|
fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
|
||||||
fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap")
|
fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap")
|
||||||
@@ -597,11 +672,27 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
|||||||
fmt.Fprintln(os.Stderr, " /show license Show model license")
|
fmt.Fprintln(os.Stderr, " /show license Show model license")
|
||||||
fmt.Fprintln(os.Stderr, " /show modelfile Show Modelfile for this model")
|
fmt.Fprintln(os.Stderr, " /show modelfile Show Modelfile for this model")
|
||||||
fmt.Fprintln(os.Stderr, " /show parameters Show parameters for this model")
|
fmt.Fprintln(os.Stderr, " /show parameters Show parameters for this model")
|
||||||
fmt.Fprintln(os.Stderr, " /show system Show system prompt")
|
fmt.Fprintln(os.Stderr, " /show system Show system message")
|
||||||
fmt.Fprintln(os.Stderr, " /show template Show prompt template")
|
fmt.Fprintln(os.Stderr, " /show template Show prompt template")
|
||||||
fmt.Fprintln(os.Stderr, "")
|
fmt.Fprintln(os.Stderr, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// only list out the most common parameters
|
||||||
|
usageParameters := func() {
|
||||||
|
fmt.Fprintln(os.Stderr, "Available Parameters:")
|
||||||
|
fmt.Fprintln(os.Stderr, " /set parameter seed <int> Random number seed")
|
||||||
|
fmt.Fprintln(os.Stderr, " /set parameter num_predict <int> Max number of tokens to predict")
|
||||||
|
fmt.Fprintln(os.Stderr, " /set parameter top_k <int> Pick from top k num of tokens")
|
||||||
|
fmt.Fprintln(os.Stderr, " /set parameter top_p <float> Pick token based on sum of probabilities")
|
||||||
|
fmt.Fprintln(os.Stderr, " /set parameter num_ctx <int> Set the context size")
|
||||||
|
fmt.Fprintln(os.Stderr, " /set parameter temperature <float> Set creativity level")
|
||||||
|
fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty <float> How strongly to penalize repetitions")
|
||||||
|
fmt.Fprintln(os.Stderr, " /set parameter repeat_last_n <int> Set how far back to look for repetitions")
|
||||||
|
fmt.Fprintln(os.Stderr, " /set parameter num_gpu <int> The number of layers to send to the GPU")
|
||||||
|
fmt.Fprintln(os.Stderr, " /set parameter stop \"<string>\", ... Set the stop parameters")
|
||||||
|
fmt.Fprintln(os.Stderr, "")
|
||||||
|
}
|
||||||
|
|
||||||
scanner, err := readline.New(readline.Prompt{
|
scanner, err := readline.New(readline.Prompt{
|
||||||
Prompt: ">>> ",
|
Prompt: ">>> ",
|
||||||
AltPrompt: "... ",
|
AltPrompt: "... ",
|
||||||
@@ -615,6 +706,7 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
|||||||
fmt.Print(readline.StartBracketedPaste)
|
fmt.Print(readline.StartBracketedPaste)
|
||||||
defer fmt.Printf(readline.EndBracketedPaste)
|
defer fmt.Printf(readline.EndBracketedPaste)
|
||||||
|
|
||||||
|
var multiline MultilineState
|
||||||
var prompt string
|
var prompt string
|
||||||
|
|
||||||
for {
|
for {
|
||||||
@@ -641,16 +733,30 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
|||||||
// if the prompt so far starts with """ then we're in multiline mode
|
// if the prompt so far starts with """ then we're in multiline mode
|
||||||
// and we need to keep reading until we find a line that ends with """
|
// and we need to keep reading until we find a line that ends with """
|
||||||
cut, found := strings.CutSuffix(line, `"""`)
|
cut, found := strings.CutSuffix(line, `"""`)
|
||||||
prompt += cut + "\n"
|
prompt += cut
|
||||||
|
|
||||||
if !found {
|
if !found {
|
||||||
|
prompt += "\n"
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
prompt = strings.TrimPrefix(prompt, `"""`)
|
prompt = strings.TrimPrefix(prompt, `"""`)
|
||||||
scanner.Prompt.UseAlt = false
|
scanner.Prompt.UseAlt = false
|
||||||
|
|
||||||
|
switch multiline {
|
||||||
|
case MultilineSystem:
|
||||||
|
opts.System = prompt
|
||||||
|
prompt = ""
|
||||||
|
fmt.Println("Set system message.")
|
||||||
|
case MultilineTemplate:
|
||||||
|
opts.Template = prompt
|
||||||
|
prompt = ""
|
||||||
|
fmt.Println("Set prompt template.")
|
||||||
|
}
|
||||||
|
multiline = MultilineNone
|
||||||
case strings.HasPrefix(line, `"""`) && len(prompt) == 0:
|
case strings.HasPrefix(line, `"""`) && len(prompt) == 0:
|
||||||
scanner.Prompt.UseAlt = true
|
scanner.Prompt.UseAlt = true
|
||||||
|
multiline = MultilinePrompt
|
||||||
prompt += line + "\n"
|
prompt += line + "\n"
|
||||||
continue
|
continue
|
||||||
case scanner.Pasting:
|
case scanner.Pasting:
|
||||||
@@ -670,10 +776,10 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
|||||||
case "nohistory":
|
case "nohistory":
|
||||||
scanner.HistoryDisable()
|
scanner.HistoryDisable()
|
||||||
case "wordwrap":
|
case "wordwrap":
|
||||||
wordWrap = true
|
opts.WordWrap = true
|
||||||
fmt.Println("Set 'wordwrap' mode.")
|
fmt.Println("Set 'wordwrap' mode.")
|
||||||
case "nowordwrap":
|
case "nowordwrap":
|
||||||
wordWrap = false
|
opts.WordWrap = false
|
||||||
fmt.Println("Set 'nowordwrap' mode.")
|
fmt.Println("Set 'nowordwrap' mode.")
|
||||||
case "verbose":
|
case "verbose":
|
||||||
cmd.Flags().Set("verbose", "true")
|
cmd.Flags().Set("verbose", "true")
|
||||||
@@ -685,12 +791,60 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
|||||||
if len(args) < 3 || args[2] != "json" {
|
if len(args) < 3 || args[2] != "json" {
|
||||||
fmt.Println("Invalid or missing format. For 'json' mode use '/set format json'")
|
fmt.Println("Invalid or missing format. For 'json' mode use '/set format json'")
|
||||||
} else {
|
} else {
|
||||||
format = args[2]
|
opts.Format = args[2]
|
||||||
fmt.Printf("Set format to '%s' mode.\n", args[2])
|
fmt.Printf("Set format to '%s' mode.\n", args[2])
|
||||||
}
|
}
|
||||||
case "noformat":
|
case "noformat":
|
||||||
format = ""
|
opts.Format = ""
|
||||||
fmt.Println("Disabled format.")
|
fmt.Println("Disabled format.")
|
||||||
|
case "parameter":
|
||||||
|
if len(args) < 4 {
|
||||||
|
usageParameters()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var params []string
|
||||||
|
for _, p := range args[3:] {
|
||||||
|
params = append(params, p)
|
||||||
|
}
|
||||||
|
fp, err := api.FormatParams(map[string][]string{args[2]: params})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Couldn't set parameter: %q\n\n", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fmt.Printf("Set parameter '%s' to '%s'\n\n", args[2], strings.Join(params, ", "))
|
||||||
|
opts.Options[args[2]] = fp[args[2]]
|
||||||
|
case "system", "template":
|
||||||
|
if len(args) < 3 {
|
||||||
|
usageSet()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
line := strings.Join(args[2:], " ")
|
||||||
|
line = strings.TrimPrefix(line, `"""`)
|
||||||
|
if strings.HasPrefix(args[2], `"""`) {
|
||||||
|
cut, found := strings.CutSuffix(line, `"""`)
|
||||||
|
prompt += cut
|
||||||
|
if found {
|
||||||
|
if args[1] == "system" {
|
||||||
|
opts.System = prompt
|
||||||
|
fmt.Println("Set system message.")
|
||||||
|
} else {
|
||||||
|
opts.Template = prompt
|
||||||
|
fmt.Println("Set prompt template.")
|
||||||
|
}
|
||||||
|
prompt = ""
|
||||||
|
} else {
|
||||||
|
prompt = `"""` + prompt + "\n"
|
||||||
|
if args[1] == "system" {
|
||||||
|
multiline = MultilineSystem
|
||||||
|
} else {
|
||||||
|
multiline = MultilineTemplate
|
||||||
|
}
|
||||||
|
scanner.Prompt.UseAlt = true
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
opts.System = line
|
||||||
|
fmt.Println("Set system message.")
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
|
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
|
||||||
}
|
}
|
||||||
@@ -705,7 +859,7 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
|||||||
fmt.Println("error: couldn't connect to ollama server")
|
fmt.Println("error: couldn't connect to ollama server")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
resp, err := client.Show(cmd.Context(), &api.ShowRequest{Name: model})
|
resp, err := client.Show(cmd.Context(), &api.ShowRequest{Name: opts.Model})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println("error: couldn't get model")
|
fmt.Println("error: couldn't get model")
|
||||||
return err
|
return err
|
||||||
@@ -724,19 +878,33 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
|||||||
if resp.Parameters == "" {
|
if resp.Parameters == "" {
|
||||||
fmt.Print("No parameters were specified for this model.\n\n")
|
fmt.Print("No parameters were specified for this model.\n\n")
|
||||||
} else {
|
} else {
|
||||||
|
if len(opts.Options) > 0 {
|
||||||
|
fmt.Println("User defined parameters:")
|
||||||
|
for k, v := range opts.Options {
|
||||||
|
fmt.Printf("%-*s %v\n", 30, k, v)
|
||||||
|
}
|
||||||
|
fmt.Println()
|
||||||
|
}
|
||||||
|
fmt.Println("Model defined parameters:")
|
||||||
fmt.Println(resp.Parameters)
|
fmt.Println(resp.Parameters)
|
||||||
}
|
}
|
||||||
case "system":
|
case "system":
|
||||||
if resp.System == "" {
|
switch {
|
||||||
fmt.Print("No system prompt was specified for this model.\n\n")
|
case opts.System != "":
|
||||||
} else {
|
fmt.Println(opts.System + "\n")
|
||||||
fmt.Println(resp.System)
|
case resp.System != "":
|
||||||
|
fmt.Println(resp.System + "\n")
|
||||||
|
default:
|
||||||
|
fmt.Print("No system message was specified for this model.\n\n")
|
||||||
}
|
}
|
||||||
case "template":
|
case "template":
|
||||||
if resp.Template == "" {
|
switch {
|
||||||
fmt.Print("No prompt template was specified for this model.\n\n")
|
case opts.Template != "":
|
||||||
} else {
|
fmt.Println(opts.Template + "\n")
|
||||||
|
case resp.Template != "":
|
||||||
fmt.Println(resp.Template)
|
fmt.Println(resp.Template)
|
||||||
|
default:
|
||||||
|
fmt.Print("No prompt template was specified for this model.\n\n")
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
|
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
|
||||||
@@ -766,8 +934,30 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
|||||||
prompt += line
|
prompt += line
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(prompt) > 0 && prompt[0] != '/' {
|
if len(prompt) > 0 && multiline == MultilineNone {
|
||||||
if err := generate(cmd, model, prompt, wordWrap, format); err != nil {
|
opts.Prompt = prompt
|
||||||
|
if multiModal {
|
||||||
|
newPrompt, images, err := extractFileNames(prompt)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
opts.Prompt = newPrompt
|
||||||
|
|
||||||
|
// reset the context if we find another image
|
||||||
|
if len(images) > 0 {
|
||||||
|
opts.Images = images
|
||||||
|
ctx := cmd.Context()
|
||||||
|
ctx = context.WithValue(ctx, generateContextKey("context"), []int{})
|
||||||
|
cmd.SetContext(ctx)
|
||||||
|
}
|
||||||
|
if len(opts.Images) == 0 {
|
||||||
|
fmt.Println("This model requires you to add a jpeg, png, or svg image.")
|
||||||
|
fmt.Println()
|
||||||
|
prompt = ""
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := generate(cmd, opts); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -776,6 +966,57 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func normalizeFilePath(fp string) string {
|
||||||
|
// Define a map of escaped characters and their replacements
|
||||||
|
replacements := map[string]string{
|
||||||
|
"\\ ": " ", // Escaped space
|
||||||
|
"\\(": "(", // Escaped left parenthesis
|
||||||
|
"\\)": ")", // Escaped right parenthesis
|
||||||
|
"\\[": "[", // Escaped left square bracket
|
||||||
|
"\\]": "]", // Escaped right square bracket
|
||||||
|
"\\{": "{", // Escaped left curly brace
|
||||||
|
"\\}": "}", // Escaped right curly brace
|
||||||
|
"\\$": "$", // Escaped dollar sign
|
||||||
|
"\\&": "&", // Escaped ampersand
|
||||||
|
"\\;": ";", // Escaped semicolon
|
||||||
|
"\\'": "'", // Escaped single quote
|
||||||
|
"\\\\": "\\", // Escaped backslash
|
||||||
|
"\\*": "*", // Escaped asterisk
|
||||||
|
"\\?": "?", // Escaped question mark
|
||||||
|
}
|
||||||
|
|
||||||
|
for escaped, actual := range replacements {
|
||||||
|
fp = strings.ReplaceAll(fp, escaped, actual)
|
||||||
|
}
|
||||||
|
return fp
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractFileNames(input string) (string, []ImageData, error) {
|
||||||
|
// Regex to match file paths starting with / or ./ and include escaped spaces (\ or %20)
|
||||||
|
// and followed by more characters and a file extension
|
||||||
|
regexPattern := `(?:\./|/)[\S\\ ]+?\.(?i:jpg|jpeg|png|svg)\b`
|
||||||
|
re := regexp.MustCompile(regexPattern)
|
||||||
|
|
||||||
|
filePaths := re.FindAllString(input, -1)
|
||||||
|
var imgs []ImageData
|
||||||
|
|
||||||
|
for _, fp := range filePaths {
|
||||||
|
nfp := normalizeFilePath(fp)
|
||||||
|
data, err := getImageData(nfp)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fmt.Printf("Couldn't process image: %q\n", err)
|
||||||
|
return "", imgs, err
|
||||||
|
}
|
||||||
|
fmt.Printf("Added image '%s'\n", nfp)
|
||||||
|
input = strings.ReplaceAll(input, fp, "")
|
||||||
|
imgs = append(imgs, data)
|
||||||
|
}
|
||||||
|
return input, imgs, nil
|
||||||
|
}
|
||||||
|
|
||||||
func RunServer(cmd *cobra.Command, _ []string) error {
|
func RunServer(cmd *cobra.Command, _ []string) error {
|
||||||
host, port, err := net.SplitHostPort(os.Getenv("OLLAMA_HOST"))
|
host, port, err := net.SplitHostPort(os.Getenv("OLLAMA_HOST"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -794,12 +1035,51 @@ func RunServer(cmd *cobra.Command, _ []string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
var origins []string
|
return server.Serve(ln)
|
||||||
if o := os.Getenv("OLLAMA_ORIGINS"); o != "" {
|
|
||||||
origins = strings.Split(o, ",")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return server.Serve(ln, origins)
|
func getImageData(filePath string) ([]byte, error) {
|
||||||
|
file, err := os.Open(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
buf := make([]byte, 512)
|
||||||
|
_, err = file.Read(buf)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
contentType := http.DetectContentType(buf)
|
||||||
|
allowedTypes := []string{"image/jpeg", "image/jpg", "image/svg+xml", "image/png"}
|
||||||
|
if !slices.Contains(allowedTypes, contentType) {
|
||||||
|
return nil, fmt.Errorf("invalid image type: %s", contentType)
|
||||||
|
}
|
||||||
|
|
||||||
|
info, err := file.Stat()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the file size exceeds 100MB
|
||||||
|
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
||||||
|
if info.Size() > maxSize {
|
||||||
|
return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = make([]byte, info.Size())
|
||||||
|
_, err = file.Seek(0, 0)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = io.ReadFull(file, buf)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func initializeKeypair() error {
|
func initializeKeypair() error {
|
||||||
@@ -851,7 +1131,7 @@ func initializeKeypair() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func startMacApp(client *api.Client) error {
|
func startMacApp(ctx context.Context, client *api.Client) error {
|
||||||
exe, err := os.Executable()
|
exe, err := os.Executable()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -875,24 +1155,24 @@ func startMacApp(client *api.Client) error {
|
|||||||
case <-timeout:
|
case <-timeout:
|
||||||
return errors.New("timed out waiting for server to start")
|
return errors.New("timed out waiting for server to start")
|
||||||
case <-tick:
|
case <-tick:
|
||||||
if err := client.Heartbeat(context.Background()); err == nil {
|
if err := client.Heartbeat(ctx); err == nil {
|
||||||
return nil // server has started
|
return nil // server has started
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func checkServerHeartbeat(_ *cobra.Command, _ []string) error {
|
func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
|
||||||
client, err := api.ClientFromEnvironment()
|
client, err := api.ClientFromEnvironment()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := client.Heartbeat(context.Background()); err != nil {
|
if err := client.Heartbeat(cmd.Context()); err != nil {
|
||||||
if !strings.Contains(err.Error(), "connection refused") {
|
if !strings.Contains(err.Error(), "connection refused") {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if runtime.GOOS == "darwin" {
|
if runtime.GOOS == "darwin" {
|
||||||
if err := startMacApp(client); err != nil {
|
if err := startMacApp(cmd.Context(), client); err != nil {
|
||||||
return fmt.Errorf("could not connect to ollama app, is it running?")
|
return fmt.Errorf("could not connect to ollama app, is it running?")
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -902,8 +1182,29 @@ func checkServerHeartbeat(_ *cobra.Command, _ []string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func versionHandler(cmd *cobra.Command, _ []string) {
|
||||||
|
client, err := api.ClientFromEnvironment()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
serverVersion, err := client.Version(cmd.Context())
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Warning: could not connect to a running Ollama instance")
|
||||||
|
}
|
||||||
|
|
||||||
|
if serverVersion != "" {
|
||||||
|
fmt.Printf("ollama version is %s\n", serverVersion)
|
||||||
|
}
|
||||||
|
|
||||||
|
if serverVersion != version.Version {
|
||||||
|
fmt.Printf("Warning: client version is %s\n", version.Version)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func NewCLI() *cobra.Command {
|
func NewCLI() *cobra.Command {
|
||||||
log.SetFlags(log.LstdFlags | log.Lshortfile)
|
log.SetFlags(log.LstdFlags | log.Lshortfile)
|
||||||
|
cobra.EnableCommandSorting = false
|
||||||
|
|
||||||
rootCmd := &cobra.Command{
|
rootCmd := &cobra.Command{
|
||||||
Use: "ollama",
|
Use: "ollama",
|
||||||
@@ -913,10 +1214,17 @@ func NewCLI() *cobra.Command {
|
|||||||
CompletionOptions: cobra.CompletionOptions{
|
CompletionOptions: cobra.CompletionOptions{
|
||||||
DisableDefaultCmd: true,
|
DisableDefaultCmd: true,
|
||||||
},
|
},
|
||||||
Version: version.Version,
|
Run: func(cmd *cobra.Command, args []string) {
|
||||||
|
if version, _ := cmd.Flags().GetBool("version"); version {
|
||||||
|
versionHandler(cmd, args)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
cobra.EnableCommandSorting = false
|
cmd.Print(cmd.UsageString())
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
rootCmd.Flags().BoolP("version", "v", false, "Show version information")
|
||||||
|
|
||||||
createCmd := &cobra.Command{
|
createCmd := &cobra.Command{
|
||||||
Use: "create MODEL",
|
Use: "create MODEL",
|
||||||
@@ -940,7 +1248,7 @@ func NewCLI() *cobra.Command {
|
|||||||
showCmd.Flags().Bool("modelfile", false, "Show Modelfile of a model")
|
showCmd.Flags().Bool("modelfile", false, "Show Modelfile of a model")
|
||||||
showCmd.Flags().Bool("parameters", false, "Show parameters of a model")
|
showCmd.Flags().Bool("parameters", false, "Show parameters of a model")
|
||||||
showCmd.Flags().Bool("template", false, "Show template of a model")
|
showCmd.Flags().Bool("template", false, "Show template of a model")
|
||||||
showCmd.Flags().Bool("system", false, "Show system prompt of a model")
|
showCmd.Flags().Bool("system", false, "Show system message of a model")
|
||||||
|
|
||||||
runCmd := &cobra.Command{
|
runCmd := &cobra.Command{
|
||||||
Use: "run MODEL [PROMPT]",
|
Use: "run MODEL [PROMPT]",
|
||||||
|
|||||||
224
docs/api.md
224
docs/api.md
@@ -3,6 +3,7 @@
|
|||||||
## Endpoints
|
## Endpoints
|
||||||
|
|
||||||
- [Generate a completion](#generate-a-completion)
|
- [Generate a completion](#generate-a-completion)
|
||||||
|
- [Generate a chat completion](#generate-a-chat-completion)
|
||||||
- [Create a Model](#create-a-model)
|
- [Create a Model](#create-a-model)
|
||||||
- [List Local Models](#list-local-models)
|
- [List Local Models](#list-local-models)
|
||||||
- [Show Model Information](#show-model-information)
|
- [Show Model Information](#show-model-information)
|
||||||
@@ -24,7 +25,7 @@ All durations are returned in nanoseconds.
|
|||||||
|
|
||||||
### Streaming responses
|
### Streaming responses
|
||||||
|
|
||||||
Certain endpoints stream responses as JSON objects delineated with the newline (`\n`) character.
|
Certain endpoints stream responses as JSON objects.
|
||||||
|
|
||||||
## Generate a completion
|
## Generate a completion
|
||||||
|
|
||||||
@@ -32,22 +33,23 @@ Certain endpoints stream responses as JSON objects delineated with the newline (
|
|||||||
POST /api/generate
|
POST /api/generate
|
||||||
```
|
```
|
||||||
|
|
||||||
Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses. The final response object will include statistics and additional data from the request.
|
Generate a response for a given prompt with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.
|
||||||
|
|
||||||
### Parameters
|
### Parameters
|
||||||
|
|
||||||
- `model`: (required) the [model name](#model-names)
|
- `model`: (required) the [model name](#model-names)
|
||||||
- `prompt`: the prompt to generate a response for
|
- `prompt`: the prompt to generate a response for
|
||||||
|
- `images`: a list of base64-encoded images (for multimodal models such as `llava`)
|
||||||
|
|
||||||
Advanced parameters (optional):
|
Advanced parameters (optional):
|
||||||
|
|
||||||
- `format`: the format to return a response in. Currently the only accepted value is `json`
|
- `format`: the format to return a response in. Currently the only accepted value is `json`
|
||||||
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
||||||
- `system`: system prompt to (overrides what is defined in the `Modelfile`)
|
- `system`: system message to (overrides what is defined in the `Modelfile`)
|
||||||
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
|
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
|
||||||
- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
|
- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
|
||||||
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
|
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
|
||||||
- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
|
- `raw`: if `true` no formatting will be applied to the prompt. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API.
|
||||||
|
|
||||||
### JSON mode
|
### JSON mode
|
||||||
|
|
||||||
@@ -83,8 +85,6 @@ The final response in the stream also includes additional data about the generat
|
|||||||
|
|
||||||
- `total_duration`: time spent generating the response
|
- `total_duration`: time spent generating the response
|
||||||
- `load_duration`: time spent in nanoseconds loading the model
|
- `load_duration`: time spent in nanoseconds loading the model
|
||||||
- `sample_count`: number of samples generated
|
|
||||||
- `sample_duration`: time spent generating samples
|
|
||||||
- `prompt_eval_count`: number of tokens in the prompt
|
- `prompt_eval_count`: number of tokens in the prompt
|
||||||
- `prompt_eval_duration`: time spent in nanoseconds evaluating the prompt
|
- `prompt_eval_duration`: time spent in nanoseconds evaluating the prompt
|
||||||
- `eval_count`: number of tokens the response
|
- `eval_count`: number of tokens the response
|
||||||
@@ -103,8 +103,6 @@ To calculate how fast the response is generated in tokens per second (token/s),
|
|||||||
"done": true,
|
"done": true,
|
||||||
"total_duration": 5589157167,
|
"total_duration": 5589157167,
|
||||||
"load_duration": 3013701500,
|
"load_duration": 3013701500,
|
||||||
"sample_count": 114,
|
|
||||||
"sample_duration": 81442000,
|
|
||||||
"prompt_eval_count": 46,
|
"prompt_eval_count": 46,
|
||||||
"prompt_eval_duration": 1160282000,
|
"prompt_eval_duration": 1160282000,
|
||||||
"eval_count": 113,
|
"eval_count": 113,
|
||||||
@@ -114,6 +112,8 @@ To calculate how fast the response is generated in tokens per second (token/s),
|
|||||||
|
|
||||||
#### Request (No streaming)
|
#### Request (No streaming)
|
||||||
|
|
||||||
|
A response can be recieved in one reply when streaming is off.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama2",
|
"model": "llama2",
|
||||||
@@ -135,8 +135,6 @@ If `stream` is set to `false`, the response will be a single JSON object:
|
|||||||
"done": true,
|
"done": true,
|
||||||
"total_duration": 5589157167,
|
"total_duration": 5589157167,
|
||||||
"load_duration": 3013701500,
|
"load_duration": 3013701500,
|
||||||
"sample_count": 114,
|
|
||||||
"sample_duration": 81442000,
|
|
||||||
"prompt_eval_count": 46,
|
"prompt_eval_count": 46,
|
||||||
"prompt_eval_duration": 1160282000,
|
"prompt_eval_duration": 1160282000,
|
||||||
"eval_count": 13,
|
"eval_count": 13,
|
||||||
@@ -144,9 +142,40 @@ If `stream` is set to `false`, the response will be a single JSON object:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Request (Raw mode)
|
#### Request (with images)
|
||||||
|
|
||||||
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
|
To submit images to multimodal models such as `llava` or `bakllava`, provide a list of base64-encoded `images`:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl http://localhost:11434/api/generate -d '{
|
||||||
|
"model": "llava",
|
||||||
|
"prompt":"What is in this picture?",
|
||||||
|
"stream": false,
|
||||||
|
"images": ["iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"model": "llava",
|
||||||
|
"created_at": "2023-11-03T15:36:02.583064Z",
|
||||||
|
"response": "A happy cartoon character, which is cute and cheerful.",
|
||||||
|
"context": [1, 2, 3],
|
||||||
|
"done": true,
|
||||||
|
"total_duration": 14648695333,
|
||||||
|
"load_duration": 3302671417,
|
||||||
|
"prompt_eval_count": 14,
|
||||||
|
"prompt_eval_duration": 286243000,
|
||||||
|
"eval_count": 129,
|
||||||
|
"eval_duration": 10931424000
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Request (Raw Mode)
|
||||||
|
|
||||||
|
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
@@ -164,6 +193,7 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
"model": "mistral",
|
"model": "mistral",
|
||||||
"created_at": "2023-11-03T15:36:02.583064Z",
|
"created_at": "2023-11-03T15:36:02.583064Z",
|
||||||
"response": " The sky appears blue because of a phenomenon called Rayleigh scattering.",
|
"response": " The sky appears blue because of a phenomenon called Rayleigh scattering.",
|
||||||
|
"context": [1, 2, 3],
|
||||||
"done": true,
|
"done": true,
|
||||||
"total_duration": 14648695333,
|
"total_duration": 14648695333,
|
||||||
"load_duration": 3302671417,
|
"load_duration": 3302671417,
|
||||||
@@ -249,7 +279,7 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
"penalize_newline": true,
|
"penalize_newline": true,
|
||||||
"stop": ["\n", "user:"],
|
"stop": ["\n", "user:"],
|
||||||
"numa": false,
|
"numa": false,
|
||||||
"num_ctx": 4,
|
"num_ctx": 1024,
|
||||||
"num_batch": 2,
|
"num_batch": 2,
|
||||||
"num_gqa": 1,
|
"num_gqa": 1,
|
||||||
"num_gpu": 1,
|
"num_gpu": 1,
|
||||||
@@ -275,12 +305,9 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
"model": "llama2",
|
"model": "llama2",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"response": "The sky is blue because it is the color of the sky.",
|
"response": "The sky is blue because it is the color of the sky.",
|
||||||
"context": [1, 2, 3],
|
|
||||||
"done": true,
|
"done": true,
|
||||||
"total_duration": 5589157167,
|
"total_duration": 5589157167,
|
||||||
"load_duration": 3013701500,
|
"load_duration": 3013701500,
|
||||||
"sample_count": 114,
|
|
||||||
"sample_duration": 81442000,
|
|
||||||
"prompt_eval_count": 46,
|
"prompt_eval_count": 46,
|
||||||
"prompt_eval_duration": 1160282000,
|
"prompt_eval_duration": 1160282000,
|
||||||
"eval_count": 13,
|
"eval_count": 13,
|
||||||
@@ -288,6 +315,155 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Generate a chat completion
|
||||||
|
|
||||||
|
```shell
|
||||||
|
POST /api/chat
|
||||||
|
```
|
||||||
|
|
||||||
|
Generate the next message in a chat with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.
|
||||||
|
|
||||||
|
### Parameters
|
||||||
|
|
||||||
|
- `model`: (required) the [model name](#model-names)
|
||||||
|
- `messages`: the messages of the chat, this can be used to keep a chat memory
|
||||||
|
|
||||||
|
The `message` object has the following fields:
|
||||||
|
|
||||||
|
- `role`: the role of the message, either `system`, `user` or `assistant`
|
||||||
|
- `content`: the content of the message
|
||||||
|
- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
|
||||||
|
|
||||||
|
Advanced parameters (optional):
|
||||||
|
|
||||||
|
- `format`: the format to return a response in. Currently the only accepted value is `json`
|
||||||
|
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
||||||
|
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
|
||||||
|
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
#### Request
|
||||||
|
|
||||||
|
Send a chat message with a streaming response.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl http://localhost:11434/api/chat -d '{
|
||||||
|
"model": "llama2",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "why is the sky blue?"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
A stream of JSON objects is returned:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "llama2",
|
||||||
|
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||||
|
"message": {
|
||||||
|
"role": "assisant",
|
||||||
|
"content": "The"
|
||||||
|
},
|
||||||
|
"done": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Final response:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "llama2",
|
||||||
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
|
"done": true,
|
||||||
|
"total_duration": 5589157167,
|
||||||
|
"load_duration": 3013701500,
|
||||||
|
"prompt_eval_count": 46,
|
||||||
|
"prompt_eval_duration": 1160282000,
|
||||||
|
"eval_count": 113,
|
||||||
|
"eval_duration": 1325948000
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Request (With History)
|
||||||
|
|
||||||
|
Send a chat message with a conversation history.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl http://localhost:11434/api/chat -d '{
|
||||||
|
"model": "llama2",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "why is the sky blue?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "due to rayleigh scattering."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "how is that different than mie scattering?"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
A stream of JSON objects is returned:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "llama2",
|
||||||
|
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||||
|
"message": {
|
||||||
|
"role": "assisant",
|
||||||
|
"content": "The"
|
||||||
|
},
|
||||||
|
"done": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Final response:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "llama2",
|
||||||
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
|
"done": true,
|
||||||
|
"total_duration": 5589157167,
|
||||||
|
"load_duration": 3013701500,
|
||||||
|
"prompt_eval_count": 46,
|
||||||
|
"prompt_eval_duration": 1160282000,
|
||||||
|
"eval_count": 113,
|
||||||
|
"eval_duration": 1325948000
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Request (with images)
|
||||||
|
|
||||||
|
Send a chat message with a conversation history.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl http://localhost:11434/api/chat -d '{
|
||||||
|
"model": "llama2",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what is in this image?",
|
||||||
|
"images": ["iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"]
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
## Create a Model
|
## Create a Model
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
@@ -415,7 +591,7 @@ A single JSON object will be returned.
|
|||||||
POST /api/show
|
POST /api/show
|
||||||
```
|
```
|
||||||
|
|
||||||
Show details about a model including modelfile, template, parameters, license, and system prompt.
|
Show information about a model including details, modelfile, template, parameters, license, and system prompt.
|
||||||
|
|
||||||
### Parameters
|
### Parameters
|
||||||
|
|
||||||
@@ -435,10 +611,16 @@ curl http://localhost:11434/api/show -d '{
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"license": "<contents of license block>",
|
"modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM mike/llava:latest\nTEMPLATE \"\"\"\nUSER:{{ .Prompt }}\nASSISTANT:\n\"\"\"\nPARAMETER num_ctx 4096",
|
||||||
"modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama2:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <<SYS>>\nPARAMETER stop <</SYS>>\n",
|
"parameters": "num_ctx 4096",
|
||||||
"parameters": "stop [INST]\nstop [/INST]\nstop <<SYS>>\nstop <</SYS>>",
|
"template": "\nUSER:{{ .Prompt }}\nASSISTANT:\n",
|
||||||
"template": "[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] "
|
"license:": "<license>",
|
||||||
|
"details": {
|
||||||
|
"format": "gguf",
|
||||||
|
"families": ["llama", "clip"],
|
||||||
|
"parameter_size": "7B",
|
||||||
|
"quantization_level": "Q4_0"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ Ollama binds to 127.0.0.1 port 11434 by default. Change the bind address with th
|
|||||||
On macOS:
|
On macOS:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
OLLAMA_HOST=0.0.0.0:11435 ollama serve
|
OLLAMA_HOST=0.0.0.0:11434 ollama serve
|
||||||
```
|
```
|
||||||
|
|
||||||
On Linux:
|
On Linux:
|
||||||
@@ -59,7 +59,7 @@ OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
|
|||||||
On Linux:
|
On Linux:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
echo 'Environment="OLLAMA_ORIGINS=http://129.168.1.1:*,https://example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf
|
echo 'Environment="OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf
|
||||||
```
|
```
|
||||||
|
|
||||||
Reload `systemd` and restart Ollama:
|
Reload `systemd` and restart Ollama:
|
||||||
|
|||||||
@@ -43,7 +43,6 @@ Ollama supports a set of model architectures, with support for more coming soon:
|
|||||||
|
|
||||||
- Llama & Mistral
|
- Llama & Mistral
|
||||||
- Falcon & RW
|
- Falcon & RW
|
||||||
- GPT-NeoX
|
|
||||||
- BigCode
|
- BigCode
|
||||||
|
|
||||||
To view a model's architecture, check the `config.json` file in its HuggingFace repo. You should see an entry under `architectures` (e.g. `LlamaForCausalLM`).
|
To view a model's architecture, check the `config.json` file in its HuggingFace repo. You should see an entry under `architectures` (e.g. `LlamaForCausalLM`).
|
||||||
@@ -184,9 +183,6 @@ python convert.py <path to model directory>
|
|||||||
# FalconForCausalLM
|
# FalconForCausalLM
|
||||||
python convert-falcon-hf-to-gguf.py <path to model directory>
|
python convert-falcon-hf-to-gguf.py <path to model directory>
|
||||||
|
|
||||||
# GPTNeoXForCausalLM
|
|
||||||
python convert-gptneox-hf-to-gguf.py <path to model directory>
|
|
||||||
|
|
||||||
# GPTBigCodeForCausalLM
|
# GPTBigCodeForCausalLM
|
||||||
python convert-starcoder-hf-to-gguf.py <path to model directory>
|
python convert-starcoder-hf-to-gguf.py <path to model directory>
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -31,11 +31,11 @@ INSTRUCTION arguments
|
|||||||
```
|
```
|
||||||
|
|
||||||
| Instruction | Description |
|
| Instruction | Description |
|
||||||
| ----------------------------------- | ------------------------------------------------------------- |
|
| ----------------------------------- | -------------------------------------------------------------- |
|
||||||
| [`FROM`](#from-required) (required) | Defines the base model to use. |
|
| [`FROM`](#from-required) (required) | Defines the base model to use. |
|
||||||
| [`PARAMETER`](#parameter) | Sets the parameters for how Ollama will run the model. |
|
| [`PARAMETER`](#parameter) | Sets the parameters for how Ollama will run the model. |
|
||||||
| [`TEMPLATE`](#template) | The full prompt template to be sent to the model. |
|
| [`TEMPLATE`](#template) | The full prompt template to be sent to the model. |
|
||||||
| [`SYSTEM`](#system) | Specifies the system prompt that will be set in the template. |
|
| [`SYSTEM`](#system) | Specifies the system message that will be set in the template. |
|
||||||
| [`ADAPTER`](#adapter) | Defines the (Q)LoRA adapters to apply to the model. |
|
| [`ADAPTER`](#adapter) | Defines the (Q)LoRA adapters to apply to the model. |
|
||||||
| [`LICENSE`](#license) | Specifies the legal license. |
|
| [`LICENSE`](#license) | Specifies the legal license. |
|
||||||
|
|
||||||
@@ -52,7 +52,7 @@ PARAMETER temperature 1
|
|||||||
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
|
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
|
||||||
PARAMETER num_ctx 4096
|
PARAMETER num_ctx 4096
|
||||||
|
|
||||||
# sets a custom system prompt to specify the behavior of the chat assistant
|
# sets a custom system message to specify the behavior of the chat assistant
|
||||||
SYSTEM You are Mario from super mario bros, acting as an assistant.
|
SYSTEM You are Mario from super mario bros, acting as an assistant.
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -152,13 +152,13 @@ PARAMETER <parameter> <parametervalue>
|
|||||||
|
|
||||||
### TEMPLATE
|
### TEMPLATE
|
||||||
|
|
||||||
`TEMPLATE` of the full prompt template to be passed into the model. It may include (optionally) a system prompt and a user's prompt. This is used to create a full custom prompt, and syntax may be model specific. You can usually find the template for a given model in the readme for that model.
|
`TEMPLATE` of the full prompt template to be passed into the model. It may include (optionally) a system message and a user's prompt. This is used to create a full custom prompt, and syntax may be model specific. You can usually find the template for a given model in the readme for that model.
|
||||||
|
|
||||||
#### Template Variables
|
#### Template Variables
|
||||||
|
|
||||||
| Variable | Description |
|
| Variable | Description |
|
||||||
| --------------- | ------------------------------------------------------------------------------------------------------------ |
|
| --------------- | ------------------------------------------------------------------------------------------------------------- |
|
||||||
| `{{ .System }}` | The system prompt used to specify custom behavior, this must also be set in the Modelfile as an instruction. |
|
| `{{ .System }}` | The system message used to specify custom behavior, this must also be set in the Modelfile as an instruction. |
|
||||||
| `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input. |
|
| `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input. |
|
||||||
| `{{ .First }}` | A boolean value used to render specific template information for the first generation of a session. |
|
| `{{ .First }}` | A boolean value used to render specific template information for the first generation of a session. |
|
||||||
|
|
||||||
@@ -180,7 +180,7 @@ SYSTEM """<system message>"""
|
|||||||
|
|
||||||
### SYSTEM
|
### SYSTEM
|
||||||
|
|
||||||
The `SYSTEM` instruction specifies the system prompt to be used in the template, if applicable.
|
The `SYSTEM` instruction specifies the system message to be used in the template, if applicable.
|
||||||
|
|
||||||
```modelfile
|
```modelfile
|
||||||
SYSTEM """<system message>"""
|
SYSTEM """<system message>"""
|
||||||
|
|||||||
83
docs/tutorials/fly-gpu.md
Normal file
83
docs/tutorials/fly-gpu.md
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
# Running Ollama on Fly.io GPU Instances
|
||||||
|
|
||||||
|
Ollama runs with little to no configuration on [Fly.io GPU instances](https://fly.io/docs/gpus/gpu-quickstart/). If you don't have access to GPUs yet, you'll need to [apply for access](https://fly.io/gpu/) on the waitlist. Once you're accepted, you'll get an email with instructions on how to get started.
|
||||||
|
|
||||||
|
Create a new app with `fly apps create`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
fly apps create
|
||||||
|
```
|
||||||
|
|
||||||
|
Then create a `fly.toml` file in a new folder that looks like this:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
app = "sparkling-violet-709"
|
||||||
|
primary_region = "ord"
|
||||||
|
vm.size = "a100-40gb" # see https://fly.io/docs/gpus/gpu-quickstart/ for more info
|
||||||
|
|
||||||
|
[build]
|
||||||
|
image = "ollama/ollama"
|
||||||
|
|
||||||
|
[http_service]
|
||||||
|
internal_port = 11434
|
||||||
|
force_https = false
|
||||||
|
auto_stop_machines = true
|
||||||
|
auto_start_machines = true
|
||||||
|
min_machines_running = 0
|
||||||
|
processes = ["app"]
|
||||||
|
|
||||||
|
[mounts]
|
||||||
|
source = "models"
|
||||||
|
destination = "/root/.ollama"
|
||||||
|
initial_size = "100gb"
|
||||||
|
```
|
||||||
|
|
||||||
|
Then create a [new private IPv6 address](https://fly.io/docs/reference/private-networking/#flycast-private-load-balancing) for your app:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
fly ips allocate-v6 --private
|
||||||
|
```
|
||||||
|
|
||||||
|
Then deploy your app:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
fly deploy
|
||||||
|
```
|
||||||
|
|
||||||
|
And finally you can access it interactively with a new Fly.io Machine:
|
||||||
|
|
||||||
|
```
|
||||||
|
fly machine run -e OLLAMA_HOST=http://your-app-name.flycast --shell ollama/ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ ollama run openchat:7b-v3.5-fp16
|
||||||
|
>>> How do I bake chocolate chip cookies?
|
||||||
|
To bake chocolate chip cookies, follow these steps:
|
||||||
|
|
||||||
|
1. Preheat the oven to 375°F (190°C) and line a baking sheet with parchment paper or silicone baking mat.
|
||||||
|
|
||||||
|
2. In a large bowl, mix together 1 cup of unsalted butter (softened), 3/4 cup granulated sugar, and 3/4
|
||||||
|
cup packed brown sugar until light and fluffy.
|
||||||
|
|
||||||
|
3. Add 2 large eggs, one at a time, to the butter mixture, beating well after each addition. Stir in 1
|
||||||
|
teaspoon of pure vanilla extract.
|
||||||
|
|
||||||
|
4. In a separate bowl, whisk together 2 cups all-purpose flour, 1/2 teaspoon baking soda, and 1/2 teaspoon
|
||||||
|
salt. Gradually add the dry ingredients to the wet ingredients, stirring until just combined.
|
||||||
|
|
||||||
|
5. Fold in 2 cups of chocolate chips (or chunks) into the dough.
|
||||||
|
|
||||||
|
6. Drop rounded tablespoons of dough onto the prepared baking sheet, spacing them about 2 inches apart.
|
||||||
|
|
||||||
|
7. Bake for 10-12 minutes, or until the edges are golden brown. The centers should still be slightly soft.
|
||||||
|
|
||||||
|
8. Allow the cookies to cool on the baking sheet for a few minutes before transferring them to a wire rack
|
||||||
|
to cool completely.
|
||||||
|
|
||||||
|
Enjoy your homemade chocolate chip cookies!
|
||||||
|
```
|
||||||
|
|
||||||
|
When you set it up like this, it will automatically turn off when you're done using it. Then when you access it again, it will automatically turn back on. This is a great way to save money on GPU instances when you're not using them. If you want a persistent wake-on-use connection to your Ollama instance, you can set up a [connection to your Fly network using WireGuard](https://fly.io/docs/reference/private-networking/#discovering-apps-through-dns-on-a-wireguard-connection). Then you can access your Ollama instance at `http://your-app-name.flycast`.
|
||||||
|
|
||||||
|
And that's it!
|
||||||
@@ -42,12 +42,13 @@ text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
|
|||||||
all_splits = text_splitter.split_documents(data)
|
all_splits = text_splitter.split_documents(data)
|
||||||
```
|
```
|
||||||
|
|
||||||
It's split up, but we have to find the relevant splits and then submit those to the model. We can do this by creating embeddings and storing them in a vector database. For now, we don't have embeddings built in to Ollama, though we will be adding that soon, so for now, we can use the GPT4All library for that. We will use ChromaDB in this example for a vector database. `pip install GPT4All chromadb`
|
It's split up, but we have to find the relevant splits and then submit those to the model. We can do this by creating embeddings and storing them in a vector database. We can use Ollama directly to instantiate an embedding model. We will use ChromaDB in this example for a vector database. `pip install GPT4All chromadb`
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from langchain.embeddings import GPT4AllEmbeddings
|
from langchain.embeddings import OllamaEmbeddings
|
||||||
from langchain.vectorstores import Chroma
|
from langchain.vectorstores import Chroma
|
||||||
vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())
|
oembed = OllamaEmbeddings(base_url="http://localhost:11434", model="llama2")
|
||||||
|
vectorstore = Chroma.from_documents(documents=all_splits, embedding=oembed)
|
||||||
```
|
```
|
||||||
|
|
||||||
Now let's ask a question from the document. **Who was Neleus, and who is in his family?** Neleus is a character in the Odyssey, and the answer can be found in our text.
|
Now let's ask a question from the document. **Who was Neleus, and who is in his family?** Neleus is a character in the Odyssey, and the answer can be found in our text.
|
||||||
|
|||||||
@@ -25,9 +25,11 @@ spec:
|
|||||||
image: ollama/ollama:latest
|
image: ollama/ollama:latest
|
||||||
env:
|
env:
|
||||||
- name: PATH
|
- name: PATH
|
||||||
value: /usr/local/nvidia/bin:/usr/local/nvidia/lib64:/usr/bin:/usr/sbin:/bin:/sbin
|
value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||||
- name: LD_LIBRARY_PATH
|
- name: LD_LIBRARY_PATH
|
||||||
value: /usr/local/nvidia/lib64
|
value: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
||||||
|
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||||
|
value: compute,utility
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
containerPort: 11434
|
containerPort: 11434
|
||||||
|
|||||||
46
examples/python-simplechat/client.py
Normal file
46
examples/python-simplechat/client.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
import json
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
|
||||||
|
model = "llama2" # TODO: update this for whatever model you wish to use
|
||||||
|
|
||||||
|
|
||||||
|
def chat(messages):
|
||||||
|
r = requests.post(
|
||||||
|
"http://0.0.0.0:11434/api/chat",
|
||||||
|
json={"model": model, "messages": messages, "stream": True},
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
output = ""
|
||||||
|
|
||||||
|
for line in r.iter_lines():
|
||||||
|
body = json.loads(line)
|
||||||
|
if "error" in body:
|
||||||
|
raise Exception(body["error"])
|
||||||
|
if body.get("done") is False:
|
||||||
|
message = body.get("message", "")
|
||||||
|
content = message.get("content", "")
|
||||||
|
output += content
|
||||||
|
# the response streams one token at a time, print that as we receive it
|
||||||
|
print(content, end="", flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
if body.get("done", False):
|
||||||
|
message["content"] = output
|
||||||
|
return message
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
messages = []
|
||||||
|
|
||||||
|
while True:
|
||||||
|
user_input = input("Enter a prompt: ")
|
||||||
|
print()
|
||||||
|
messages.append({"role": "user", "content": user_input})
|
||||||
|
message = chat(messages)
|
||||||
|
messages.append(message)
|
||||||
|
print("\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
24
examples/python-simplechat/readme.md
Normal file
24
examples/python-simplechat/readme.md
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# Simple Chat Example
|
||||||
|
|
||||||
|
The **chat** endpoint is one of two ways to generate text from an LLM with Ollama. At a high level you provide the endpoint an array of objects with a role and content specified. Then with each output and prompt, you add more of those role/content objects, which builds up the history.
|
||||||
|
|
||||||
|
## Review the Code
|
||||||
|
|
||||||
|
You can see in the **chat** function that actually calling the endpoint is done simply with:
|
||||||
|
|
||||||
|
```python
|
||||||
|
r = requests.post(
|
||||||
|
"http://0.0.0.0:11434/api/chat",
|
||||||
|
json={"model": model, "messages": messages, "stream": True},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
With the **generate** endpoint, you need to provide a `prompt`. But with **chat**, you provide `messages`. And the resulting stream of responses includes a `message` object with a `content` field.
|
||||||
|
|
||||||
|
The final JSON object doesn't provide the full content, so you will need to build the content yourself.
|
||||||
|
|
||||||
|
In the **main** function, we collect `user_input` and add it as a message to our messages and that is passed to the chat function. When the LLM is done responding the output is added as another message.
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
In this example, all generations are kept. You might want to experiment with summarizing everything older than 10 conversations to enable longer history with less context being used.
|
||||||
77
examples/typescript-simplechat/client.ts
Normal file
77
examples/typescript-simplechat/client.ts
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
import * as readline from "readline";
|
||||||
|
|
||||||
|
const model = "llama2";
|
||||||
|
type Message = {
|
||||||
|
role: "assistant" | "user" | "system";
|
||||||
|
content: string;
|
||||||
|
}
|
||||||
|
const messages: Message[] = [{
|
||||||
|
role: "system",
|
||||||
|
content: "You are a helpful AI agent."
|
||||||
|
}]
|
||||||
|
|
||||||
|
const rl = readline.createInterface({
|
||||||
|
input: process.stdin,
|
||||||
|
output: process.stdout
|
||||||
|
})
|
||||||
|
|
||||||
|
async function chat(messages: Message[]): Promise<Message> {
|
||||||
|
const body = {
|
||||||
|
model: model,
|
||||||
|
messages: messages
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch("http://localhost:11434/api/chat", {
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify(body)
|
||||||
|
})
|
||||||
|
|
||||||
|
const reader = response.body?.getReader()
|
||||||
|
if (!reader) {
|
||||||
|
throw new Error("Failed to read response body")
|
||||||
|
}
|
||||||
|
let content = ""
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read()
|
||||||
|
if (done) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const rawjson = new TextDecoder().decode(value);
|
||||||
|
const json = JSON.parse(rawjson)
|
||||||
|
|
||||||
|
if (json.done === false) {
|
||||||
|
process.stdout.write(json.message.content);
|
||||||
|
content += json.message.content
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return { role: "assistant", content: content };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function askQuestion(): Promise<void> {
|
||||||
|
return new Promise<void>((resolve) => {
|
||||||
|
rl.question("\n\nAsk a question: (press enter alone to quit)\n\n", async (user_input) => {
|
||||||
|
if (user_input.trim() === "") {
|
||||||
|
rl.close();
|
||||||
|
console.log("Thankyou. Goodbye.\n")
|
||||||
|
console.log("=======\nHere is the message history that was used in this conversation.\n=======\n")
|
||||||
|
messages.forEach(message => {
|
||||||
|
console.log(message)
|
||||||
|
})
|
||||||
|
resolve();
|
||||||
|
} else {
|
||||||
|
console.log();
|
||||||
|
messages.push({ role: "user", content: user_input });
|
||||||
|
messages.push(await chat(messages));
|
||||||
|
await askQuestion(); // Ask the next question
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
await askQuestion();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
1
examples/typescript-simplechat/package.json
Normal file
1
examples/typescript-simplechat/package.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{ "dependencies": { "@types/node": "^20.10.4", "prompt-sync": "^4.2.0", "readline": "^1.3.0" } }
|
||||||
39
examples/typescript-simplechat/readme.md
Normal file
39
examples/typescript-simplechat/readme.md
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# Simple Chat Example
|
||||||
|
|
||||||
|
The **chat** endpoint is one of two ways to generate text from an LLM with Ollama. At a high level you provide the endpoint an array of message objects with a role and content specified. Then with each output and prompt, you add more messages, which builds up the history.
|
||||||
|
|
||||||
|
## Run the Example
|
||||||
|
|
||||||
|
There are a few ways to run this, just like any Typescript code:
|
||||||
|
|
||||||
|
1. Compile with `tsc` and then run it with `node client.js`.
|
||||||
|
2. Install `tsx` and run it with `tsx client.ts`.
|
||||||
|
3. Install `bun` and run it with `bun client.ts`.
|
||||||
|
|
||||||
|
## Review the Code
|
||||||
|
|
||||||
|
You can see in the **chat** function that is actually calling the endpoint is simply done with:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const body = {
|
||||||
|
model: model,
|
||||||
|
messages: messages
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch("http://localhost:11434/api/chat", {
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify(body)
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
With the **generate** endpoint, you need to provide a `prompt`. But with **chat**, you provide `messages`. And the resulting stream of responses includes a `message` object with a `content` field.
|
||||||
|
|
||||||
|
The final JSON object doesn't provide the full content, so you will need to build the content yourself. In this example, **chat** takes the full array of messages and outputs the resulting message from this call of the chat endpoint.
|
||||||
|
|
||||||
|
In the **askQuestion** function, we collect `user_input` and add it as a message to our messages and that is passed to the chat function. When the LLM is done responding the output is added as another message to the messages array.
|
||||||
|
|
||||||
|
At the end, you will see a printout of all the messages.
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
In this example, all generations are kept. You might want to experiment with summarizing everything older than 10 conversations to enable longer history with less context being used.
|
||||||
10
go.mod
10
go.mod
@@ -5,14 +5,18 @@ go 1.20
|
|||||||
require (
|
require (
|
||||||
github.com/emirpasic/gods v1.18.1
|
github.com/emirpasic/gods v1.18.1
|
||||||
github.com/gin-gonic/gin v1.9.1
|
github.com/gin-gonic/gin v1.9.1
|
||||||
github.com/mattn/go-runewidth v0.0.14
|
|
||||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db
|
|
||||||
github.com/olekukonko/tablewriter v0.0.5
|
github.com/olekukonko/tablewriter v0.0.5
|
||||||
github.com/spf13/cobra v1.7.0
|
github.com/spf13/cobra v1.7.0
|
||||||
|
github.com/stretchr/testify v1.8.3
|
||||||
golang.org/x/sync v0.3.0
|
golang.org/x/sync v0.3.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require github.com/rivo/uniseg v0.2.0 // indirect
|
require (
|
||||||
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
|
github.com/mattn/go-runewidth v0.0.14 // indirect
|
||||||
|
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||||
|
github.com/rivo/uniseg v0.2.0 // indirect
|
||||||
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/bytedance/sonic v1.9.1 // indirect
|
github.com/bytedance/sonic v1.9.1 // indirect
|
||||||
|
|||||||
2
go.sum
2
go.sum
@@ -63,8 +63,6 @@ github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D
|
|||||||
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
|
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
|
||||||
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
|
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
|
||||||
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
|
|
||||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
|
|
||||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
|
|||||||
@@ -1,20 +0,0 @@
|
|||||||
package llm
|
|
||||||
|
|
||||||
const (
|
|
||||||
falconModelType7B = 32
|
|
||||||
falconModelType40B = 60
|
|
||||||
falconModelType180B = 80
|
|
||||||
)
|
|
||||||
|
|
||||||
func falconModelType(numLayer uint32) string {
|
|
||||||
switch numLayer {
|
|
||||||
case 32:
|
|
||||||
return "7B"
|
|
||||||
case 60:
|
|
||||||
return "40B"
|
|
||||||
case 80:
|
|
||||||
return "180B"
|
|
||||||
default:
|
|
||||||
return "unknown"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
89
llm/ggml.go
89
llm/ggml.go
@@ -7,9 +7,10 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type GGML struct {
|
type GGML struct {
|
||||||
magic uint32
|
|
||||||
container
|
container
|
||||||
model
|
model
|
||||||
|
|
||||||
|
Size int64
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -82,7 +83,7 @@ type model interface {
|
|||||||
|
|
||||||
type container interface {
|
type container interface {
|
||||||
Name() string
|
Name() string
|
||||||
Decode(io.Reader) (model, error)
|
Decode(*readSeekOffset) (model, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
type containerGGML struct{}
|
type containerGGML struct{}
|
||||||
@@ -91,7 +92,9 @@ func (c *containerGGML) Name() string {
|
|||||||
return "ggml"
|
return "ggml"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *containerGGML) Decode(r io.Reader) (model, error) {
|
func (c *containerGGML) Decode(ro *readSeekOffset) (model, error) {
|
||||||
|
// file contents aren't decoded
|
||||||
|
ro.Seek(0, io.SeekEnd)
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -103,9 +106,9 @@ func (c *containerGGMF) Name() string {
|
|||||||
return "ggmf"
|
return "ggmf"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *containerGGMF) Decode(r io.Reader) (model, error) {
|
func (c *containerGGMF) Decode(ro *readSeekOffset) (model, error) {
|
||||||
var version uint32
|
var version uint32
|
||||||
binary.Read(r, binary.LittleEndian, &version)
|
binary.Read(ro, binary.LittleEndian, &version)
|
||||||
|
|
||||||
switch version {
|
switch version {
|
||||||
case 1:
|
case 1:
|
||||||
@@ -114,6 +117,10 @@ func (c *containerGGMF) Decode(r io.Reader) (model, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
c.version = version
|
c.version = version
|
||||||
|
|
||||||
|
// remaining file contents aren't decoded
|
||||||
|
ro.Seek(0, io.SeekEnd)
|
||||||
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -125,9 +132,9 @@ func (c *containerGGJT) Name() string {
|
|||||||
return "ggjt"
|
return "ggjt"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *containerGGJT) Decode(r io.Reader) (model, error) {
|
func (c *containerGGJT) Decode(ro *readSeekOffset) (model, error) {
|
||||||
var version uint32
|
var version uint32
|
||||||
binary.Read(r, binary.LittleEndian, &version)
|
binary.Read(ro, binary.LittleEndian, &version)
|
||||||
|
|
||||||
switch version {
|
switch version {
|
||||||
case 1, 2, 3:
|
case 1, 2, 3:
|
||||||
@@ -139,7 +146,11 @@ func (c *containerGGJT) Decode(r io.Reader) (model, error) {
|
|||||||
|
|
||||||
// different model types may have different layouts for hyperparameters
|
// different model types may have different layouts for hyperparameters
|
||||||
var llama llamaModel
|
var llama llamaModel
|
||||||
binary.Read(r, binary.LittleEndian, &llama.hyperparameters)
|
binary.Read(ro, binary.LittleEndian, &llama.hyperparameters)
|
||||||
|
|
||||||
|
// remaining file contents aren't decoded
|
||||||
|
ro.Seek(0, io.SeekEnd)
|
||||||
|
|
||||||
return &llama, nil
|
return &llama, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -151,9 +162,9 @@ func (c *containerLORA) Name() string {
|
|||||||
return "ggla"
|
return "ggla"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *containerLORA) Decode(r io.Reader) (model, error) {
|
func (c *containerLORA) Decode(ro *readSeekOffset) (model, error) {
|
||||||
var version uint32
|
var version uint32
|
||||||
binary.Read(r, binary.LittleEndian, &version)
|
binary.Read(ro, binary.LittleEndian, &version)
|
||||||
|
|
||||||
switch version {
|
switch version {
|
||||||
case 1:
|
case 1:
|
||||||
@@ -162,6 +173,10 @@ func (c *containerLORA) Decode(r io.Reader) (model, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
c.version = version
|
c.version = version
|
||||||
|
|
||||||
|
// remaining file contents aren't decoded
|
||||||
|
ro.Seek(0, io.SeekEnd)
|
||||||
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -180,33 +195,61 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
|
func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
|
||||||
var ggml GGML
|
ro := readSeekOffset{ReadSeeker: r}
|
||||||
binary.Read(r, binary.LittleEndian, &ggml.magic)
|
|
||||||
|
|
||||||
switch ggml.magic {
|
var magic uint32
|
||||||
|
if err := binary.Read(&ro, binary.LittleEndian, &magic); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var c container
|
||||||
|
switch magic {
|
||||||
case FILE_MAGIC_GGML:
|
case FILE_MAGIC_GGML:
|
||||||
ggml.container = &containerGGML{}
|
c = &containerGGML{}
|
||||||
case FILE_MAGIC_GGMF:
|
case FILE_MAGIC_GGMF:
|
||||||
ggml.container = &containerGGMF{}
|
c = &containerGGMF{}
|
||||||
case FILE_MAGIC_GGJT:
|
case FILE_MAGIC_GGJT:
|
||||||
ggml.container = &containerGGJT{}
|
c = &containerGGJT{}
|
||||||
case FILE_MAGIC_GGLA:
|
case FILE_MAGIC_GGLA:
|
||||||
ggml.container = &containerLORA{}
|
c = &containerLORA{}
|
||||||
case FILE_MAGIC_GGUF_LE:
|
case FILE_MAGIC_GGUF_LE:
|
||||||
ggml.container = &containerGGUF{bo: binary.LittleEndian}
|
c = &containerGGUF{bo: binary.LittleEndian}
|
||||||
case FILE_MAGIC_GGUF_BE:
|
case FILE_MAGIC_GGUF_BE:
|
||||||
ggml.container = &containerGGUF{bo: binary.BigEndian}
|
c = &containerGGUF{bo: binary.BigEndian}
|
||||||
default:
|
default:
|
||||||
return nil, errors.New("invalid file magic")
|
return nil, errors.New("invalid file magic")
|
||||||
}
|
}
|
||||||
|
|
||||||
model, err := ggml.Decode(r)
|
model, err := c.Decode(&ro)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml.model = model
|
|
||||||
|
|
||||||
// final model type
|
// final model type
|
||||||
return &ggml, nil
|
return &GGML{
|
||||||
|
container: c,
|
||||||
|
model: model,
|
||||||
|
Size: ro.offset,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type readSeekOffset struct {
|
||||||
|
io.ReadSeeker
|
||||||
|
offset int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rso *readSeekOffset) Seek(offset int64, whence int) (int64, error) {
|
||||||
|
offset, err := rso.ReadSeeker.Seek(offset, whence)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
rso.offset = offset
|
||||||
|
return offset, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rso *readSeekOffset) Read(p []byte) (int, error) {
|
||||||
|
n, err := rso.ReadSeeker.Read(p)
|
||||||
|
rso.offset += int64(n)
|
||||||
|
return n, err
|
||||||
}
|
}
|
||||||
|
|||||||
165
llm/gguf.go
165
llm/gguf.go
@@ -23,26 +23,24 @@ type containerGGUF struct {
|
|||||||
NumTensor uint64
|
NumTensor uint64
|
||||||
NumKV uint64
|
NumKV uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
parameters uint64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *containerGGUF) Name() string {
|
func (c *containerGGUF) Name() string {
|
||||||
return "gguf"
|
return "gguf"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *containerGGUF) Decode(r io.Reader) (model, error) {
|
func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
|
||||||
binary.Read(r, c.bo, &c.Version)
|
binary.Read(rso, c.bo, &c.Version)
|
||||||
|
|
||||||
switch c.Version {
|
switch c.Version {
|
||||||
case 1:
|
case 1:
|
||||||
binary.Read(r, c.bo, &c.V1)
|
binary.Read(rso, c.bo, &c.V1)
|
||||||
default:
|
default:
|
||||||
binary.Read(r, c.bo, &c.V2)
|
binary.Read(rso, c.bo, &c.V2)
|
||||||
}
|
}
|
||||||
|
|
||||||
model := newGGUFModel(c)
|
model := newGGUFModel(c)
|
||||||
if err := model.Decode(r); err != nil {
|
if err := model.Decode(rso); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -67,9 +65,23 @@ const (
|
|||||||
|
|
||||||
type kv map[string]any
|
type kv map[string]any
|
||||||
|
|
||||||
|
type tensor struct {
|
||||||
|
name string
|
||||||
|
kind uint32
|
||||||
|
offset uint64
|
||||||
|
size uint64
|
||||||
|
|
||||||
|
// shape is the number of elements in each dimension
|
||||||
|
shape [4]uint64
|
||||||
|
}
|
||||||
|
|
||||||
type ggufModel struct {
|
type ggufModel struct {
|
||||||
*containerGGUF
|
*containerGGUF
|
||||||
|
|
||||||
kv
|
kv
|
||||||
|
tensors []tensor
|
||||||
|
|
||||||
|
parameters uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
func newGGUFModel(container *containerGGUF) *ggufModel {
|
func newGGUFModel(container *containerGGUF) *ggufModel {
|
||||||
@@ -96,8 +108,7 @@ func (llm *ggufModel) NumKV() uint64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (llm *ggufModel) ModelFamily() string {
|
func (llm *ggufModel) ModelFamily() string {
|
||||||
t, ok := llm.kv["general.architecture"].(string)
|
if t, ok := llm.kv["general.architecture"].(string); ok {
|
||||||
if ok {
|
|
||||||
return t
|
return t
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -109,82 +120,60 @@ func (llm *ggufModel) ModelType() string {
|
|||||||
return format.HumanNumber(llm.parameters)
|
return format.HumanNumber(llm.parameters)
|
||||||
}
|
}
|
||||||
|
|
||||||
switch llm.ModelFamily() {
|
|
||||||
case "llama":
|
|
||||||
if blocks, ok := llm.kv["llama.block_count"].(uint32); ok {
|
|
||||||
heads, headsOK := llm.kv["llama.head_count"].(uint32)
|
|
||||||
headKVs, headsKVsOK := llm.kv["llama.head_count_kv"].(uint32)
|
|
||||||
if headsOK && headsKVsOK && heads/headKVs == 8 {
|
|
||||||
return "70B"
|
|
||||||
}
|
|
||||||
|
|
||||||
return llamaModelType(blocks)
|
|
||||||
}
|
|
||||||
case "falcon":
|
|
||||||
if blocks, ok := llm.kv["falcon.block_count"].(uint32); ok {
|
|
||||||
return falconModelType(blocks)
|
|
||||||
}
|
|
||||||
case "starcoder":
|
|
||||||
if blocks, ok := llm.kv["starcoder.block_count"].(uint32); ok {
|
|
||||||
return starCoderModelType(blocks)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return "unknown"
|
return "unknown"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *ggufModel) FileType() string {
|
func (llm *ggufModel) FileType() string {
|
||||||
t, ok := llm.kv["general.file_type"].(uint32)
|
if t, ok := llm.kv["general.file_type"].(uint32); ok {
|
||||||
if ok {
|
|
||||||
return fileType(t)
|
return fileType(t)
|
||||||
}
|
}
|
||||||
|
|
||||||
return "unknown"
|
return "unknown"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *ggufModel) Decode(r io.Reader) error {
|
func (llm *ggufModel) Decode(rso *readSeekOffset) error {
|
||||||
// decode key-values
|
// decode key-values
|
||||||
for i := 0; uint64(i) < llm.NumKV(); i++ {
|
for i := 0; uint64(i) < llm.NumKV(); i++ {
|
||||||
k, err := llm.readString(r)
|
k, err := llm.readString(rso)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
vtype := llm.readU32(r)
|
vtype := llm.readU32(rso)
|
||||||
|
|
||||||
var v any
|
var v any
|
||||||
switch vtype {
|
switch vtype {
|
||||||
case ggufTypeUint8:
|
case ggufTypeUint8:
|
||||||
v = llm.readU8(r)
|
v = llm.readU8(rso)
|
||||||
case ggufTypeInt8:
|
case ggufTypeInt8:
|
||||||
v = llm.readI8(r)
|
v = llm.readI8(rso)
|
||||||
case ggufTypeUint16:
|
case ggufTypeUint16:
|
||||||
v = llm.readU16(r)
|
v = llm.readU16(rso)
|
||||||
case ggufTypeInt16:
|
case ggufTypeInt16:
|
||||||
v = llm.readI16(r)
|
v = llm.readI16(rso)
|
||||||
case ggufTypeUint32:
|
case ggufTypeUint32:
|
||||||
v = llm.readU32(r)
|
v = llm.readU32(rso)
|
||||||
case ggufTypeInt32:
|
case ggufTypeInt32:
|
||||||
v = llm.readI32(r)
|
v = llm.readI32(rso)
|
||||||
case ggufTypeUint64:
|
case ggufTypeUint64:
|
||||||
v = llm.readU64(r)
|
v = llm.readU64(rso)
|
||||||
case ggufTypeInt64:
|
case ggufTypeInt64:
|
||||||
v = llm.readI64(r)
|
v = llm.readI64(rso)
|
||||||
case ggufTypeFloat32:
|
case ggufTypeFloat32:
|
||||||
v = llm.readF32(r)
|
v = llm.readF32(rso)
|
||||||
case ggufTypeFloat64:
|
case ggufTypeFloat64:
|
||||||
v = llm.readF64(r)
|
v = llm.readF64(rso)
|
||||||
case ggufTypeBool:
|
case ggufTypeBool:
|
||||||
v = llm.readBool(r)
|
v = llm.readBool(rso)
|
||||||
case ggufTypeString:
|
case ggufTypeString:
|
||||||
s, err := llm.readString(r)
|
s, err := llm.readString(rso)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
v = s
|
v = s
|
||||||
case ggufTypeArray:
|
case ggufTypeArray:
|
||||||
a, err := llm.readArray(r)
|
a, err := llm.readArray(rso)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -199,21 +188,85 @@ func (llm *ggufModel) Decode(r io.Reader) error {
|
|||||||
|
|
||||||
// decode tensors
|
// decode tensors
|
||||||
for i := 0; uint64(i) < llm.NumTensor(); i++ {
|
for i := 0; uint64(i) < llm.NumTensor(); i++ {
|
||||||
if _, err := llm.readString(r); err != nil {
|
name, err := llm.readString(rso)
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
dimensions := llm.readU32(r)
|
// dims is the number of dimensions in the tensor
|
||||||
|
dims := llm.readU32(rso)
|
||||||
|
|
||||||
var elements uint64 = 1
|
shape := [4]uint64{1, 1, 1, 1}
|
||||||
for i := 0; uint32(i) < dimensions; i++ {
|
for i := 0; uint32(i) < dims; i++ {
|
||||||
elements *= llm.readU64(r)
|
shape[i] = llm.readU64(rso)
|
||||||
}
|
}
|
||||||
|
|
||||||
llm.readU32(r) // type
|
kind := llm.readU32(rso)
|
||||||
llm.readU64(r) // offset
|
offset := llm.readU64(rso)
|
||||||
|
|
||||||
llm.parameters += elements
|
var blockSize uint64
|
||||||
|
switch {
|
||||||
|
case kind < 2:
|
||||||
|
blockSize = 1
|
||||||
|
case kind < 10:
|
||||||
|
blockSize = 32
|
||||||
|
default:
|
||||||
|
blockSize = 256
|
||||||
|
}
|
||||||
|
|
||||||
|
var typeSize uint64
|
||||||
|
switch kind {
|
||||||
|
case 0: // FP32
|
||||||
|
typeSize = 4
|
||||||
|
case 1: // FP16
|
||||||
|
typeSize = 2
|
||||||
|
case 2: // Q4_0
|
||||||
|
typeSize = 2 + blockSize/2
|
||||||
|
case 3: // Q4_1
|
||||||
|
typeSize = 2 + 2 + blockSize/2
|
||||||
|
case 6: // Q5_0
|
||||||
|
typeSize = 2 + 4 + blockSize/2
|
||||||
|
case 7: // Q5_1
|
||||||
|
typeSize = 2 + 2 + 4 + blockSize/2
|
||||||
|
case 8: // Q8_0
|
||||||
|
typeSize = 2 + blockSize
|
||||||
|
case 9: // Q8_1
|
||||||
|
typeSize = 4 + 4 + blockSize
|
||||||
|
case 10: // Q2_K
|
||||||
|
typeSize = blockSize/16 + blockSize/4 + 2 + 2
|
||||||
|
case 11: // Q3_K
|
||||||
|
typeSize = blockSize/8 + blockSize/4 + 12 + 2
|
||||||
|
case 12: // Q4_K
|
||||||
|
typeSize = 2 + 2 + 12 + blockSize/2
|
||||||
|
case 13: // Q5_K
|
||||||
|
typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
|
||||||
|
case 14: // Q6_K
|
||||||
|
typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
|
||||||
|
}
|
||||||
|
|
||||||
|
parameters := shape[0] * shape[1] * shape[2] * shape[3]
|
||||||
|
size := parameters * typeSize / blockSize
|
||||||
|
|
||||||
|
llm.tensors = append(llm.tensors, tensor{
|
||||||
|
name: name,
|
||||||
|
kind: kind,
|
||||||
|
offset: offset,
|
||||||
|
size: size,
|
||||||
|
shape: shape,
|
||||||
|
})
|
||||||
|
|
||||||
|
llm.parameters += parameters
|
||||||
|
}
|
||||||
|
|
||||||
|
alignment, ok := llm.kv["general.alignment"].(uint32)
|
||||||
|
if !ok {
|
||||||
|
alignment = 32
|
||||||
|
}
|
||||||
|
|
||||||
|
rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
|
||||||
|
for _, tensor := range llm.tensors {
|
||||||
|
padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
|
||||||
|
rso.Seek(padded, io.SeekCurrent)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
Submodule llm/llama.cpp/gguf updated: 9656026b53...a7aee47b98
116
llm/llama.go
116
llm/llama.go
@@ -59,6 +59,7 @@ ws ::= ([ \t\n] ws)?
|
|||||||
var llamaCppEmbed embed.FS
|
var llamaCppEmbed embed.FS
|
||||||
|
|
||||||
type ModelRunner struct {
|
type ModelRunner struct {
|
||||||
|
Type string // "gguf" or "ggml"
|
||||||
Path string // path to the model runner executable
|
Path string // path to the model runner executable
|
||||||
Accelerated bool
|
Accelerated bool
|
||||||
}
|
}
|
||||||
@@ -72,25 +73,25 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
|||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
case "darwin":
|
case "darwin":
|
||||||
if runtime.GOARCH == "arm64" {
|
if runtime.GOARCH == "arm64" {
|
||||||
runners = []ModelRunner{{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
|
runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
|
||||||
} else {
|
} else {
|
||||||
runners = []ModelRunner{{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
|
runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
|
||||||
}
|
}
|
||||||
case "linux":
|
case "linux":
|
||||||
runners = []ModelRunner{
|
runners = []ModelRunner{
|
||||||
{Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
|
{Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
|
||||||
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||||
}
|
}
|
||||||
case "windows":
|
case "windows":
|
||||||
// TODO: select windows GPU runner here when available
|
// TODO: select windows GPU runner here when available
|
||||||
runners = []ModelRunner{
|
runners = []ModelRunner{
|
||||||
{Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
|
{Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
|
||||||
{Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
|
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
|
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
|
||||||
runners = []ModelRunner{
|
runners = []ModelRunner{
|
||||||
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -148,6 +149,7 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
|||||||
for _, r := range runners {
|
for _, r := range runners {
|
||||||
// clean the ModelRunner paths so that they match the OS we are running on
|
// clean the ModelRunner paths so that they match the OS we are running on
|
||||||
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{
|
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{
|
||||||
|
Type: r.Type,
|
||||||
Path: filepath.Clean(path.Join(workDir, r.Path)),
|
Path: filepath.Clean(path.Join(workDir, r.Path)),
|
||||||
Accelerated: r.Accelerated,
|
Accelerated: r.Accelerated,
|
||||||
})
|
})
|
||||||
@@ -221,8 +223,14 @@ type Running struct {
|
|||||||
*StatusWriter // captures error messages from the llama runner process
|
*StatusWriter // captures error messages from the llama runner process
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ImageData struct {
|
||||||
|
Data []byte `json:"data"`
|
||||||
|
ID int `json:"id"`
|
||||||
|
}
|
||||||
|
|
||||||
type llama struct {
|
type llama struct {
|
||||||
api.Options
|
api.Options
|
||||||
|
ImageData []ImageData
|
||||||
Running
|
Running
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -325,7 +333,7 @@ func (w *StatusWriter) Write(b []byte) (int, error) {
|
|||||||
return os.Stderr.Write(b)
|
return os.Stderr.Write(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) {
|
func newLlama(model string, adapters, projectors []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) {
|
||||||
fileInfo, err := os.Stat(model)
|
fileInfo, err := os.Stat(model)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -365,6 +373,11 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
|||||||
params = append(params, "--lora", adapters[0])
|
params = append(params, "--lora", adapters[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(projectors) > 0 {
|
||||||
|
// TODO: applying multiple projectors is not supported by the llama.cpp server yet
|
||||||
|
params = append(params, "--mmproj", projectors[0])
|
||||||
|
}
|
||||||
|
|
||||||
if opts.NumThread > 0 {
|
if opts.NumThread > 0 {
|
||||||
params = append(params, "--threads", fmt.Sprintf("%d", opts.NumThread))
|
params = append(params, "--threads", fmt.Sprintf("%d", opts.NumThread))
|
||||||
}
|
}
|
||||||
@@ -397,11 +410,13 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
|||||||
}
|
}
|
||||||
|
|
||||||
port := rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
|
port := rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
|
||||||
|
params := append(params, "--port", strconv.Itoa(port))
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
cmd := exec.CommandContext(
|
cmd := exec.CommandContext(
|
||||||
ctx,
|
ctx,
|
||||||
runner.Path,
|
runner.Path,
|
||||||
append(params, "--port", strconv.Itoa(port))...,
|
params...,
|
||||||
)
|
)
|
||||||
|
|
||||||
var libraryPaths []string
|
var libraryPaths []string
|
||||||
@@ -530,22 +545,39 @@ type prediction struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const maxBufferSize = 512 * format.KiloByte
|
const maxBufferSize = 512 * format.KiloByte
|
||||||
|
const maxRetries = 6
|
||||||
|
|
||||||
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, format string, fn func(api.GenerateResponse)) error {
|
type PredictOpts struct {
|
||||||
prevConvo, err := llm.Decode(ctx, prevContext)
|
Prompt string
|
||||||
if err != nil {
|
Format string
|
||||||
return err
|
Images []api.ImageData
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove leading spaces from prevConvo if present
|
type PredictResult struct {
|
||||||
prevConvo = strings.TrimPrefix(prevConvo, " ")
|
Content string
|
||||||
|
Done bool
|
||||||
|
PromptEvalCount int
|
||||||
|
PromptEvalDuration time.Duration
|
||||||
|
EvalCount int
|
||||||
|
EvalDuration time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
var nextContext strings.Builder
|
// IsRetryable checks if the line matches a condition that can be retried
|
||||||
nextContext.WriteString(prevConvo)
|
func isRetryable(line []byte) bool {
|
||||||
nextContext.WriteString(prompt)
|
return bytes.Contains(line, []byte("slot unavailable"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(PredictResult)) error {
|
||||||
|
imageData := llm.ImageData
|
||||||
|
if len(predict.Images) > 0 {
|
||||||
|
for cnt, i := range predict.Images {
|
||||||
|
imageData = append(imageData, ImageData{Data: i, ID: cnt})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Printf("loaded %d images", len(imageData))
|
||||||
|
|
||||||
request := map[string]any{
|
request := map[string]any{
|
||||||
"prompt": nextContext.String(),
|
"prompt": predict.Prompt,
|
||||||
"stream": true,
|
"stream": true,
|
||||||
"n_predict": llm.NumPredict,
|
"n_predict": llm.NumPredict,
|
||||||
"n_keep": llm.NumKeep,
|
"n_keep": llm.NumKeep,
|
||||||
@@ -565,12 +597,20 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
|
|||||||
"penalize_nl": llm.PenalizeNewline,
|
"penalize_nl": llm.PenalizeNewline,
|
||||||
"seed": llm.Seed,
|
"seed": llm.Seed,
|
||||||
"stop": llm.Stop,
|
"stop": llm.Stop,
|
||||||
|
"image_data": imageData,
|
||||||
}
|
}
|
||||||
|
|
||||||
if format == "json" {
|
if predict.Format == "json" {
|
||||||
request["grammar"] = jsonGrammar
|
request["grammar"] = jsonGrammar
|
||||||
}
|
}
|
||||||
|
|
||||||
|
retryDelay := 100 * time.Microsecond
|
||||||
|
for retries := 0; retries < maxRetries; retries++ {
|
||||||
|
if retries > 0 {
|
||||||
|
time.Sleep(retryDelay) // wait before retrying
|
||||||
|
retryDelay *= 2 // exponential backoff
|
||||||
|
}
|
||||||
|
|
||||||
// Handling JSON marshaling with special characters unescaped.
|
// Handling JSON marshaling with special characters unescaped.
|
||||||
buffer := &bytes.Buffer{}
|
buffer := &bytes.Buffer{}
|
||||||
enc := json.NewEncoder(buffer)
|
enc := json.NewEncoder(buffer)
|
||||||
@@ -606,6 +646,8 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
|
|||||||
// increase the buffer size to avoid running out of space
|
// increase the buffer size to avoid running out of space
|
||||||
buf := make([]byte, 0, maxBufferSize)
|
buf := make([]byte, 0, maxBufferSize)
|
||||||
scanner.Buffer(buf, maxBufferSize)
|
scanner.Buffer(buf, maxBufferSize)
|
||||||
|
|
||||||
|
retryNeeded := false
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
@@ -617,37 +659,39 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if evt, ok := bytes.CutPrefix(line, []byte("data: ")); ok {
|
if isRetryable(line) {
|
||||||
|
retryNeeded = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
evt, ok := bytes.CutPrefix(line, []byte("data: "))
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("error parsing llm response stream: %s", line)
|
||||||
|
}
|
||||||
|
|
||||||
var p prediction
|
var p prediction
|
||||||
if err := json.Unmarshal(evt, &p); err != nil {
|
if err := json.Unmarshal(evt, &p); err != nil {
|
||||||
return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
|
return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.Content != "" {
|
if p.Content != "" {
|
||||||
fn(api.GenerateResponse{Response: p.Content})
|
fn(PredictResult{
|
||||||
nextContext.WriteString(p.Content)
|
Content: p.Content,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.Stop {
|
if p.Stop {
|
||||||
embd, err := llm.Encode(ctx, nextContext.String())
|
fn(PredictResult{
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("encoding context: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn(api.GenerateResponse{
|
|
||||||
Done: true,
|
Done: true,
|
||||||
Context: embd,
|
|
||||||
PromptEvalCount: p.Timings.PromptN,
|
PromptEvalCount: p.Timings.PromptN,
|
||||||
PromptEvalDuration: parseDurationMs(p.Timings.PromptMS),
|
PromptEvalDuration: parseDurationMs(p.Timings.PromptMS),
|
||||||
EvalCount: p.Timings.PredictedN,
|
EvalCount: p.Timings.PredictedN,
|
||||||
EvalDuration: parseDurationMs(p.Timings.PredictedMS),
|
EvalDuration: parseDurationMs(p.Timings.PredictedMS),
|
||||||
})
|
})
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if err := scanner.Err(); err != nil {
|
if err := scanner.Err(); err != nil {
|
||||||
if strings.Contains(err.Error(), "unexpected EOF") {
|
if strings.Contains(err.Error(), "unexpected EOF") {
|
||||||
@@ -661,7 +705,13 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
|
|||||||
return fmt.Errorf("error reading llm response: %v", err)
|
return fmt.Errorf("error reading llm response: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
if !retryNeeded {
|
||||||
|
return nil // success
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// should never reach here ideally
|
||||||
|
return fmt.Errorf("max retries exceeded")
|
||||||
}
|
}
|
||||||
|
|
||||||
type TokenizeRequest struct {
|
type TokenizeRequest struct {
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type LLM interface {
|
type LLM interface {
|
||||||
Predict(context.Context, []int, string, string, func(api.GenerateResponse)) error
|
Predict(context.Context, PredictOpts, func(PredictResult)) error
|
||||||
Embedding(context.Context, string) ([]float64, error)
|
Embedding(context.Context, string) ([]float64, error)
|
||||||
Encode(context.Context, string) ([]int, error)
|
Encode(context.Context, string) ([]int, error)
|
||||||
Decode(context.Context, []int) (string, error)
|
Decode(context.Context, []int) (string, error)
|
||||||
@@ -23,7 +23,7 @@ type LLM interface {
|
|||||||
Ping(context.Context) error
|
Ping(context.Context) error
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(workDir, model string, adapters []string, opts api.Options) (LLM, error) {
|
func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
|
||||||
if _, err := os.Stat(model); err != nil {
|
if _, err := os.Stat(model); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -82,9 +82,9 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
|
|||||||
opts.NumGQA = 0
|
opts.NumGQA = 0
|
||||||
opts.RopeFrequencyBase = 0.0
|
opts.RopeFrequencyBase = 0.0
|
||||||
opts.RopeFrequencyScale = 0.0
|
opts.RopeFrequencyScale = 0.0
|
||||||
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
|
return newLlama(model, adapters, projectors, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
|
||||||
case "ggml", "ggmf", "ggjt", "ggla":
|
case "ggml", "ggmf", "ggjt", "ggla":
|
||||||
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
|
return newLlama(model, adapters, projectors, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
|
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,23 +0,0 @@
|
|||||||
package llm
|
|
||||||
|
|
||||||
const (
|
|
||||||
starCoderModelType1B = 24
|
|
||||||
starCoderModelType3B = 36
|
|
||||||
starCoderModelType7B = 42
|
|
||||||
starCoderModelType15B = 40
|
|
||||||
)
|
|
||||||
|
|
||||||
func starCoderModelType(numLayer uint32) string {
|
|
||||||
switch numLayer {
|
|
||||||
case 24:
|
|
||||||
return "1B"
|
|
||||||
case 36:
|
|
||||||
return "3B"
|
|
||||||
case 42:
|
|
||||||
return "7B"
|
|
||||||
case 40:
|
|
||||||
return "15B"
|
|
||||||
default:
|
|
||||||
return "unknown"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -37,10 +37,13 @@ func Parse(reader io.Reader) ([]Command, error) {
|
|||||||
switch string(bytes.ToUpper(fields[0])) {
|
switch string(bytes.ToUpper(fields[0])) {
|
||||||
case "FROM":
|
case "FROM":
|
||||||
command.Name = "model"
|
command.Name = "model"
|
||||||
command.Args = string(fields[1])
|
command.Args = string(bytes.TrimSpace(fields[1]))
|
||||||
// copy command for validation
|
// copy command for validation
|
||||||
modelCommand = command
|
modelCommand = command
|
||||||
case "LICENSE", "TEMPLATE", "SYSTEM", "PROMPT", "ADAPTER":
|
case "ADAPTER":
|
||||||
|
command.Name = string(bytes.ToLower(fields[0]))
|
||||||
|
command.Args = string(bytes.TrimSpace(fields[1]))
|
||||||
|
case "LICENSE", "TEMPLATE", "SYSTEM", "PROMPT":
|
||||||
command.Name = string(bytes.ToLower(fields[0]))
|
command.Name = string(bytes.ToLower(fields[0]))
|
||||||
command.Args = string(fields[1])
|
command.Args = string(fields[1])
|
||||||
case "PARAMETER":
|
case "PARAMETER":
|
||||||
@@ -50,7 +53,7 @@ func Parse(reader io.Reader) ([]Command, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
command.Name = string(fields[0])
|
command.Name = string(fields[0])
|
||||||
command.Args = string(fields[1])
|
command.Args = string(bytes.TrimSpace(fields[1]))
|
||||||
case "EMBED":
|
case "EMBED":
|
||||||
return nil, fmt.Errorf("deprecated command: EMBED is no longer supported, use the /embed API endpoint instead")
|
return nil, fmt.Errorf("deprecated command: EMBED is no longer supported, use the /embed API endpoint instead")
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -191,6 +191,8 @@ func (i *Instance) Readline() (string, error) {
|
|||||||
buf.ClearScreen()
|
buf.ClearScreen()
|
||||||
case CharCtrlW:
|
case CharCtrlW:
|
||||||
buf.DeleteWord()
|
buf.DeleteWord()
|
||||||
|
case CharCtrlZ:
|
||||||
|
return handleCharCtrlZ(fd, termios)
|
||||||
case CharEnter:
|
case CharEnter:
|
||||||
output := buf.String()
|
output := buf.String()
|
||||||
if output != "" {
|
if output != "" {
|
||||||
|
|||||||
18
readline/readline_unix.go
Normal file
18
readline/readline_unix.go
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
//go:build !windows
|
||||||
|
|
||||||
|
package readline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
func handleCharCtrlZ(fd int, termios *Termios) (string, error) {
|
||||||
|
if err := UnsetRawMode(fd, termios); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
syscall.Kill(0, syscall.SIGSTOP)
|
||||||
|
|
||||||
|
// on resume...
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
6
readline/readline_windows.go
Normal file
6
readline/readline_windows.go
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
package readline
|
||||||
|
|
||||||
|
func handleCharCtrlZ(fd int, state *State) (string, error) {
|
||||||
|
// not supported
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
@@ -217,7 +217,7 @@ fi
|
|||||||
|
|
||||||
if ! check_gpu nvidia-smi || [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then
|
if ! check_gpu nvidia-smi || [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then
|
||||||
case $OS_NAME in
|
case $OS_NAME in
|
||||||
centos|rhel) install_cuda_driver_yum 'rhel' $OS_VERSION ;;
|
centos|rhel) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -d '.' -f 1) ;;
|
||||||
rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;;
|
rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;;
|
||||||
fedora) install_cuda_driver_yum $OS_NAME $OS_VERSION ;;
|
fedora) install_cuda_driver_yum $OS_NAME $OS_VERSION ;;
|
||||||
amzn) install_cuda_driver_yum 'fedora' '35' ;;
|
amzn) install_cuda_driver_yum 'fedora' '35' ;;
|
||||||
@@ -230,7 +230,8 @@ fi
|
|||||||
if ! lsmod | grep -q nvidia; then
|
if ! lsmod | grep -q nvidia; then
|
||||||
KERNEL_RELEASE="$(uname -r)"
|
KERNEL_RELEASE="$(uname -r)"
|
||||||
case $OS_NAME in
|
case $OS_NAME in
|
||||||
centos|rhel|rocky|amzn) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE kernel-headers-$KERNEL_RELEASE ;;
|
rocky) $SUDO $PACKAGE_MANAGER -y install kernel-devel kernel-headers ;;
|
||||||
|
centos|rhel|amzn) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE kernel-headers-$KERNEL_RELEASE ;;
|
||||||
fedora) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE ;;
|
fedora) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE ;;
|
||||||
debian|ubuntu) $SUDO apt-get -y install linux-headers-$KERNEL_RELEASE ;;
|
debian|ubuntu) $SUDO apt-get -y install linux-headers-$KERNEL_RELEASE ;;
|
||||||
*) exit ;;
|
*) exit ;;
|
||||||
|
|||||||
491
server/images.go
491
server/images.go
@@ -14,7 +14,6 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -37,79 +36,159 @@ type RegistryOptions struct {
|
|||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
Config ConfigV2
|
||||||
ShortName string
|
ShortName string
|
||||||
ModelPath string
|
ModelPath string
|
||||||
OriginalModel string
|
OriginalModel string
|
||||||
AdapterPaths []string
|
AdapterPaths []string
|
||||||
|
ProjectorPaths []string
|
||||||
Template string
|
Template string
|
||||||
System string
|
System string
|
||||||
License []string
|
License []string
|
||||||
Digest string
|
Digest string
|
||||||
|
Size int64
|
||||||
Options map[string]interface{}
|
Options map[string]interface{}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Model) Prompt(request api.GenerateRequest) (string, error) {
|
type PromptVars struct {
|
||||||
t := m.Template
|
System string
|
||||||
if request.Template != "" {
|
Prompt string
|
||||||
t = request.Template
|
Response string
|
||||||
|
First bool
|
||||||
}
|
}
|
||||||
|
|
||||||
tmpl, err := template.New("").Parse(t)
|
func (m *Model) Prompt(p PromptVars) (string, error) {
|
||||||
|
var prompt strings.Builder
|
||||||
|
// Use the "missingkey=zero" option to handle missing variables without panicking
|
||||||
|
tmpl, err := template.New("").Option("missingkey=zero").Parse(m.Template)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
var vars struct {
|
if p.System == "" {
|
||||||
First bool
|
// use the default system message for this model if one is not specified
|
||||||
System string
|
p.System = m.System
|
||||||
Prompt string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vars.First = len(request.Context) == 0
|
vars := map[string]any{
|
||||||
vars.System = m.System
|
"System": p.System,
|
||||||
vars.Prompt = request.Prompt
|
"Prompt": p.Prompt,
|
||||||
|
"Response": p.Response,
|
||||||
if request.System != "" {
|
"First": p.First,
|
||||||
vars.System = request.System
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var sb strings.Builder
|
var sb strings.Builder
|
||||||
if err := tmpl.Execute(&sb, vars); err != nil {
|
if err := tmpl.Execute(&sb, vars); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
prompt.WriteString(sb.String())
|
||||||
|
prompt.WriteString(p.Response)
|
||||||
|
return prompt.String(), nil
|
||||||
|
}
|
||||||
|
|
||||||
return sb.String(), nil
|
func (m *Model) ChatPrompt(msgs []api.Message) (string, []api.ImageData, error) {
|
||||||
|
// build the prompt from the list of messages
|
||||||
|
var prompt strings.Builder
|
||||||
|
var currentImages []api.ImageData
|
||||||
|
currentVars := PromptVars{
|
||||||
|
First: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
writePrompt := func() error {
|
||||||
|
p, err := m.Prompt(currentVars)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
prompt.WriteString(p)
|
||||||
|
currentVars = PromptVars{}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, msg := range msgs {
|
||||||
|
switch strings.ToLower(msg.Role) {
|
||||||
|
case "system":
|
||||||
|
if currentVars.System != "" {
|
||||||
|
if err := writePrompt(); err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
currentVars.System = msg.Content
|
||||||
|
case "user":
|
||||||
|
if currentVars.Prompt != "" {
|
||||||
|
if err := writePrompt(); err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
currentVars.Prompt = msg.Content
|
||||||
|
currentImages = msg.Images
|
||||||
|
case "assistant":
|
||||||
|
currentVars.Response = msg.Content
|
||||||
|
if err := writePrompt(); err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return "", nil, fmt.Errorf("invalid role: %s, role must be one of [system, user, assistant]", msg.Role)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append the last set of vars if they are non-empty
|
||||||
|
if currentVars.Prompt != "" || currentVars.System != "" {
|
||||||
|
if err := writePrompt(); err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return prompt.String(), currentImages, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type ManifestV2 struct {
|
type ManifestV2 struct {
|
||||||
SchemaVersion int `json:"schemaVersion"`
|
SchemaVersion int `json:"schemaVersion"`
|
||||||
MediaType string `json:"mediaType"`
|
MediaType string `json:"mediaType"`
|
||||||
Config Layer `json:"config"`
|
Config *Layer `json:"config"`
|
||||||
Layers []*Layer `json:"layers"`
|
Layers []*Layer `json:"layers"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Layer struct {
|
|
||||||
MediaType string `json:"mediaType"`
|
|
||||||
Digest string `json:"digest"`
|
|
||||||
Size int64 `json:"size"`
|
|
||||||
From string `json:"from,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type LayerReader struct {
|
|
||||||
Layer
|
|
||||||
io.Reader
|
|
||||||
}
|
|
||||||
|
|
||||||
type ConfigV2 struct {
|
type ConfigV2 struct {
|
||||||
ModelFormat string `json:"model_format"`
|
ModelFormat string `json:"model_format"`
|
||||||
ModelFamily string `json:"model_family"`
|
ModelFamily string `json:"model_family"`
|
||||||
|
ModelFamilies []string `json:"model_families"`
|
||||||
ModelType string `json:"model_type"`
|
ModelType string `json:"model_type"`
|
||||||
FileType string `json:"file_type"`
|
FileType string `json:"file_type"`
|
||||||
RootFS RootFS `json:"rootfs"`
|
|
||||||
|
|
||||||
// required by spec
|
// required by spec
|
||||||
Architecture string `json:"architecture"`
|
Architecture string `json:"architecture"`
|
||||||
OS string `json:"os"`
|
OS string `json:"os"`
|
||||||
|
RootFS RootFS `json:"rootfs"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ConfigV2) SetModelFormat(format string) {
|
||||||
|
if c.ModelFormat == "" {
|
||||||
|
c.ModelFormat = format
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ConfigV2) SetModelFamily(families ...string) {
|
||||||
|
for _, family := range families {
|
||||||
|
if c.ModelFamily == "" {
|
||||||
|
c.ModelFamily = family
|
||||||
|
}
|
||||||
|
|
||||||
|
if !slices.Contains(c.ModelFamilies, family) {
|
||||||
|
c.ModelFamilies = append(c.ModelFamilies, family)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ConfigV2) SetModelType(modelType string) {
|
||||||
|
if c.ModelType == "" {
|
||||||
|
c.ModelType = modelType
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ConfigV2) SetFileType(fileType string) {
|
||||||
|
if c.FileType == "" {
|
||||||
|
c.FileType = fileType
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type RootFS struct {
|
type RootFS struct {
|
||||||
@@ -166,6 +245,22 @@ func GetModel(name string) (*Model, error) {
|
|||||||
Digest: digest,
|
Digest: digest,
|
||||||
Template: "{{ .Prompt }}",
|
Template: "{{ .Prompt }}",
|
||||||
License: []string{},
|
License: []string{},
|
||||||
|
Size: manifest.GetTotalSize(),
|
||||||
|
}
|
||||||
|
|
||||||
|
filename, err := GetBlobsPath(manifest.Config.Digest)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
configFile, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer configFile.Close()
|
||||||
|
|
||||||
|
if err := json.NewDecoder(configFile).Decode(&model.Config); err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, layer := range manifest.Layers {
|
for _, layer := range manifest.Layers {
|
||||||
@@ -184,6 +279,8 @@ func GetModel(name string) (*Model, error) {
|
|||||||
log.Print("WARNING: model contains embeddings, but embeddings in modelfiles have been deprecated and will be ignored.")
|
log.Print("WARNING: model contains embeddings, but embeddings in modelfiles have been deprecated and will be ignored.")
|
||||||
case "application/vnd.ollama.image.adapter":
|
case "application/vnd.ollama.image.adapter":
|
||||||
model.AdapterPaths = append(model.AdapterPaths, filename)
|
model.AdapterPaths = append(model.AdapterPaths, filename)
|
||||||
|
case "application/vnd.ollama.image.projector":
|
||||||
|
model.ProjectorPaths = append(model.ProjectorPaths, filename)
|
||||||
case "application/vnd.ollama.image.template":
|
case "application/vnd.ollama.image.template":
|
||||||
bts, err := os.ReadFile(filename)
|
bts, err := os.ReadFile(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -257,11 +354,14 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
|||||||
config := ConfigV2{
|
config := ConfigV2{
|
||||||
OS: "linux",
|
OS: "linux",
|
||||||
Architecture: "amd64",
|
Architecture: "amd64",
|
||||||
|
RootFS: RootFS{
|
||||||
|
Type: "layers",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
deleteMap := make(map[string]struct{})
|
deleteMap := make(map[string]struct{})
|
||||||
|
|
||||||
var layers []*LayerReader
|
var layers Layers
|
||||||
|
|
||||||
params := make(map[string][]string)
|
params := make(map[string][]string)
|
||||||
fromParams := make(map[string]any)
|
fromParams := make(map[string]any)
|
||||||
@@ -318,10 +418,10 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
config.ModelFormat = fromConfig.ModelFormat
|
config.SetModelFormat(fromConfig.ModelFormat)
|
||||||
config.ModelFamily = fromConfig.ModelFamily
|
config.SetModelFamily(append(fromConfig.ModelFamilies, fromConfig.ModelFamily)...)
|
||||||
config.ModelType = fromConfig.ModelType
|
config.SetModelType(fromConfig.ModelType)
|
||||||
config.FileType = fromConfig.FileType
|
config.SetFileType(fromConfig.FileType)
|
||||||
|
|
||||||
for _, layer := range manifest.Layers {
|
for _, layer := range manifest.Layers {
|
||||||
deleteMap[layer.Digest] = struct{}{}
|
deleteMap[layer.Digest] = struct{}{}
|
||||||
@@ -342,13 +442,12 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
layer, err := GetLayerWithBufferFromLayer(layer)
|
layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, modelpath.GetShortTagname())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
layer.From = modelpath.GetShortTagname()
|
layers.Add(layer)
|
||||||
layers = append(layers, layer)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
deleteMap[manifest.Config.Digest] = struct{}{}
|
deleteMap[manifest.Config.Digest] = struct{}{}
|
||||||
@@ -356,26 +455,48 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
|||||||
}
|
}
|
||||||
defer bin.Close()
|
defer bin.Close()
|
||||||
|
|
||||||
|
var offset int64
|
||||||
|
for {
|
||||||
fn(api.ProgressResponse{Status: "creating model layer"})
|
fn(api.ProgressResponse{Status: "creating model layer"})
|
||||||
|
|
||||||
|
bin.Seek(offset, io.SeekStart)
|
||||||
ggml, err := llm.DecodeGGML(bin)
|
ggml, err := llm.DecodeGGML(bin)
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
break
|
||||||
|
} else if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
config.SetModelFormat(ggml.Name())
|
||||||
|
config.SetModelFamily(ggml.ModelFamily())
|
||||||
|
config.SetModelType(ggml.ModelType())
|
||||||
|
config.SetFileType(ggml.FileType())
|
||||||
|
|
||||||
|
mediatype := mediatype
|
||||||
|
if ggml.ModelFamily() == "clip" {
|
||||||
|
mediatype = "application/vnd.ollama.image.projector"
|
||||||
|
}
|
||||||
|
|
||||||
|
sr := io.NewSectionReader(bin, offset, ggml.Size)
|
||||||
|
layer, err := NewLayer(sr, mediatype)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
config.ModelFormat = ggml.Name()
|
layers.Add(layer)
|
||||||
config.ModelFamily = ggml.ModelFamily()
|
|
||||||
config.ModelType = ggml.ModelType()
|
|
||||||
config.FileType = ggml.FileType()
|
|
||||||
|
|
||||||
bin.Seek(0, io.SeekStart)
|
offset += ggml.Size
|
||||||
layer, err := CreateLayer(bin)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
layer.MediaType = mediatype
|
|
||||||
layers = append(layers, layer)
|
|
||||||
case "adapter":
|
case "adapter":
|
||||||
|
if strings.HasPrefix(c.Args, "@") {
|
||||||
|
blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Args = blobPath
|
||||||
|
}
|
||||||
|
|
||||||
fn(api.ProgressResponse{Status: "creating adapter layer"})
|
fn(api.ProgressResponse{Status: "creating adapter layer"})
|
||||||
bin, err := os.Open(realpath(modelFileDir, c.Args))
|
bin, err := os.Open(realpath(modelFileDir, c.Args))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -383,41 +504,32 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
|||||||
}
|
}
|
||||||
defer bin.Close()
|
defer bin.Close()
|
||||||
|
|
||||||
layer, err := CreateLayer(bin)
|
layer, err := NewLayer(bin, mediatype)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if layer.Size > 0 {
|
layers.Add(layer)
|
||||||
layer.MediaType = mediatype
|
|
||||||
layers = append(layers, layer)
|
|
||||||
}
|
|
||||||
case "license":
|
case "license":
|
||||||
fn(api.ProgressResponse{Status: "creating license layer"})
|
fn(api.ProgressResponse{Status: "creating license layer"})
|
||||||
layer, err := CreateLayer(strings.NewReader(c.Args))
|
|
||||||
|
bin := strings.NewReader(c.Args)
|
||||||
|
layer, err := NewLayer(bin, mediatype)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if layer.Size > 0 {
|
layers.Add(layer)
|
||||||
layer.MediaType = mediatype
|
|
||||||
layers = append(layers, layer)
|
|
||||||
}
|
|
||||||
case "template", "system":
|
case "template", "system":
|
||||||
fn(api.ProgressResponse{Status: fmt.Sprintf("creating %s layer", c.Name)})
|
fn(api.ProgressResponse{Status: fmt.Sprintf("creating %s layer", c.Name)})
|
||||||
|
|
||||||
// remove duplicate layers
|
bin := strings.NewReader(c.Args)
|
||||||
layers = removeLayerFromLayers(layers, mediatype)
|
layer, err := NewLayer(bin, mediatype)
|
||||||
|
|
||||||
layer, err := CreateLayer(strings.NewReader(c.Args))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if layer.Size > 0 {
|
layers.Replace(layer)
|
||||||
layer.MediaType = mediatype
|
|
||||||
layers = append(layers, layer)
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
params[c.Name] = append(params[c.Name], c.Args)
|
params[c.Name] = append(params[c.Name], c.Args)
|
||||||
}
|
}
|
||||||
@@ -426,7 +538,7 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
|||||||
if len(params) > 0 {
|
if len(params) > 0 {
|
||||||
fn(api.ProgressResponse{Status: "creating parameters layer"})
|
fn(api.ProgressResponse{Status: "creating parameters layer"})
|
||||||
|
|
||||||
formattedParams, err := formatParams(params)
|
formattedParams, err := api.FormatParams(params)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -437,6 +549,7 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// xxx - can this be removed?
|
||||||
if config.ModelType == "65B" {
|
if config.ModelType == "65B" {
|
||||||
if gqa, ok := formattedParams["gqa"].(int); ok && gqa == 8 {
|
if gqa, ok := formattedParams["gqa"].(int); ok && gqa == 8 {
|
||||||
config.ModelType = "70B"
|
config.ModelType = "70B"
|
||||||
@@ -449,40 +562,51 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn(api.ProgressResponse{Status: "creating config layer"})
|
fn(api.ProgressResponse{Status: "creating config layer"})
|
||||||
layer, err := CreateLayer(bytes.NewReader(b.Bytes()))
|
layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
layer.MediaType = "application/vnd.ollama.image.params"
|
layers.Replace(layer)
|
||||||
layers = append(layers, layer)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
digests, err := getLayerDigests(layers)
|
digests := make([]string, len(layers.items))
|
||||||
|
for i, layer := range layers.items {
|
||||||
|
digests[i] = layer.Digest
|
||||||
|
}
|
||||||
|
|
||||||
|
config.RootFS.DiffIDs = digests
|
||||||
|
|
||||||
|
var b bytes.Buffer
|
||||||
|
if err := json.NewEncoder(&b).Encode(config); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
configLayer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
configLayer, err := createConfigLayer(config, digests)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
layers = append(layers, configLayer)
|
|
||||||
delete(deleteMap, configLayer.Digest)
|
delete(deleteMap, configLayer.Digest)
|
||||||
|
|
||||||
if err := SaveLayers(layers, fn, false); err != nil {
|
for _, layer := range append(layers.items, configLayer) {
|
||||||
|
committed, err := layer.Commit()
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
var contentLayers []*Layer
|
status := "writing layer"
|
||||||
for _, layer := range layers {
|
if !committed {
|
||||||
contentLayers = append(contentLayers, &layer.Layer)
|
status = "using already created layer"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn(api.ProgressResponse{Status: fmt.Sprintf("%s %s", status, layer.Digest)})
|
||||||
|
|
||||||
delete(deleteMap, layer.Digest)
|
delete(deleteMap, layer.Digest)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn(api.ProgressResponse{Status: "writing manifest"})
|
fn(api.ProgressResponse{Status: "writing manifest"})
|
||||||
if err := CreateManifest(name, configLayer, contentLayers); err != nil {
|
if err := WriteManifest(name, configLayer, layers.items); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -496,177 +620,6 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func removeLayerFromLayers(layers []*LayerReader, mediaType string) []*LayerReader {
|
|
||||||
return slices.DeleteFunc(layers, func(layer *LayerReader) bool {
|
|
||||||
return layer.MediaType == mediaType
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func SaveLayers(layers []*LayerReader, fn func(resp api.ProgressResponse), force bool) error {
|
|
||||||
// Write each of the layers to disk
|
|
||||||
for _, layer := range layers {
|
|
||||||
fp, err := GetBlobsPath(layer.Digest)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = os.Stat(fp)
|
|
||||||
if os.IsNotExist(err) || force {
|
|
||||||
fn(api.ProgressResponse{Status: fmt.Sprintf("writing layer %s", layer.Digest)})
|
|
||||||
|
|
||||||
out, err := os.Create(fp)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("couldn't create %s", fp)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer out.Close()
|
|
||||||
|
|
||||||
if _, err = io.Copy(out, layer.Reader); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
fn(api.ProgressResponse{Status: fmt.Sprintf("using already created layer %s", layer.Digest)})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func CreateManifest(name string, cfg *LayerReader, layers []*Layer) error {
|
|
||||||
mp := ParseModelPath(name)
|
|
||||||
manifest := ManifestV2{
|
|
||||||
SchemaVersion: 2,
|
|
||||||
MediaType: "application/vnd.docker.distribution.manifest.v2+json",
|
|
||||||
Config: Layer{
|
|
||||||
MediaType: cfg.MediaType,
|
|
||||||
Size: cfg.Size,
|
|
||||||
Digest: cfg.Digest,
|
|
||||||
},
|
|
||||||
Layers: layers,
|
|
||||||
}
|
|
||||||
|
|
||||||
manifestJSON, err := json.Marshal(manifest)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
fp, err := mp.GetManifestPath()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := os.MkdirAll(filepath.Dir(fp), 0o755); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return os.WriteFile(fp, manifestJSON, 0o644)
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetLayerWithBufferFromLayer(layer *Layer) (*LayerReader, error) {
|
|
||||||
fp, err := GetBlobsPath(layer.Digest)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
file, err := os.Open(fp)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("could not open blob: %w", err)
|
|
||||||
}
|
|
||||||
defer file.Close()
|
|
||||||
|
|
||||||
newLayer, err := CreateLayer(file)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
newLayer.MediaType = layer.MediaType
|
|
||||||
return newLayer, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// formatParams converts specified parameter options to their correct types
|
|
||||||
func formatParams(params map[string][]string) (map[string]interface{}, error) {
|
|
||||||
opts := api.Options{}
|
|
||||||
valueOpts := reflect.ValueOf(&opts).Elem() // names of the fields in the options struct
|
|
||||||
typeOpts := reflect.TypeOf(opts) // types of the fields in the options struct
|
|
||||||
|
|
||||||
// build map of json struct tags to their types
|
|
||||||
jsonOpts := make(map[string]reflect.StructField)
|
|
||||||
for _, field := range reflect.VisibleFields(typeOpts) {
|
|
||||||
jsonTag := strings.Split(field.Tag.Get("json"), ",")[0]
|
|
||||||
if jsonTag != "" {
|
|
||||||
jsonOpts[jsonTag] = field
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
out := make(map[string]interface{})
|
|
||||||
// iterate params and set values based on json struct tags
|
|
||||||
for key, vals := range params {
|
|
||||||
if opt, ok := jsonOpts[key]; ok {
|
|
||||||
field := valueOpts.FieldByName(opt.Name)
|
|
||||||
if field.IsValid() && field.CanSet() {
|
|
||||||
switch field.Kind() {
|
|
||||||
case reflect.Float32:
|
|
||||||
floatVal, err := strconv.ParseFloat(vals[0], 32)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("invalid float value %s", vals)
|
|
||||||
}
|
|
||||||
|
|
||||||
out[key] = float32(floatVal)
|
|
||||||
case reflect.Int:
|
|
||||||
intVal, err := strconv.ParseInt(vals[0], 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("invalid int value %s", vals)
|
|
||||||
}
|
|
||||||
|
|
||||||
out[key] = intVal
|
|
||||||
case reflect.Bool:
|
|
||||||
boolVal, err := strconv.ParseBool(vals[0])
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("invalid bool value %s", vals)
|
|
||||||
}
|
|
||||||
|
|
||||||
out[key] = boolVal
|
|
||||||
case reflect.String:
|
|
||||||
out[key] = vals[0]
|
|
||||||
case reflect.Slice:
|
|
||||||
// TODO: only string slices are supported right now
|
|
||||||
out[key] = vals
|
|
||||||
default:
|
|
||||||
return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return out, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func getLayerDigests(layers []*LayerReader) ([]string, error) {
|
|
||||||
var digests []string
|
|
||||||
for _, l := range layers {
|
|
||||||
if l.Digest == "" {
|
|
||||||
return nil, fmt.Errorf("layer is missing a digest")
|
|
||||||
}
|
|
||||||
digests = append(digests, l.Digest)
|
|
||||||
}
|
|
||||||
return digests, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// CreateLayer creates a Layer object from a given file
|
|
||||||
func CreateLayer(f io.ReadSeeker) (*LayerReader, error) {
|
|
||||||
digest, size := GetSHA256Digest(f)
|
|
||||||
f.Seek(0, io.SeekStart)
|
|
||||||
|
|
||||||
layer := &LayerReader{
|
|
||||||
Layer: Layer{
|
|
||||||
MediaType: "application/vnd.docker.image.rootfs.diff.tar",
|
|
||||||
Digest: digest,
|
|
||||||
Size: size,
|
|
||||||
},
|
|
||||||
Reader: f,
|
|
||||||
}
|
|
||||||
|
|
||||||
return layer, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func CopyModel(src, dest string) error {
|
func CopyModel(src, dest string) error {
|
||||||
srcModelPath := ParseModelPath(src)
|
srcModelPath := ParseModelPath(src)
|
||||||
srcPath, err := srcModelPath.GetManifestPath()
|
srcPath, err := srcModelPath.GetManifestPath()
|
||||||
@@ -934,7 +887,7 @@ func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
|
|||||||
|
|
||||||
var layers []*Layer
|
var layers []*Layer
|
||||||
layers = append(layers, manifest.Layers...)
|
layers = append(layers, manifest.Layers...)
|
||||||
layers = append(layers, &manifest.Config)
|
layers = append(layers, manifest.Config)
|
||||||
|
|
||||||
for _, layer := range layers {
|
for _, layer := range layers {
|
||||||
if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil {
|
if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil {
|
||||||
@@ -1005,7 +958,7 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
|
|||||||
|
|
||||||
var layers []*Layer
|
var layers []*Layer
|
||||||
layers = append(layers, manifest.Layers...)
|
layers = append(layers, manifest.Layers...)
|
||||||
layers = append(layers, &manifest.Config)
|
layers = append(layers, manifest.Config)
|
||||||
|
|
||||||
for _, layer := range layers {
|
for _, layer := range layers {
|
||||||
if err := downloadBlob(
|
if err := downloadBlob(
|
||||||
@@ -1093,30 +1046,6 @@ func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *RegistryOptio
|
|||||||
return m, err
|
return m, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func createConfigLayer(config ConfigV2, layers []string) (*LayerReader, error) {
|
|
||||||
config.RootFS = RootFS{
|
|
||||||
Type: "layers",
|
|
||||||
DiffIDs: layers,
|
|
||||||
}
|
|
||||||
|
|
||||||
configJSON, err := json.Marshal(config)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
digest, size := GetSHA256Digest(bytes.NewBuffer(configJSON))
|
|
||||||
|
|
||||||
layer := &LayerReader{
|
|
||||||
Layer: Layer{
|
|
||||||
MediaType: "application/vnd.docker.container.image.v1+json",
|
|
||||||
Digest: digest,
|
|
||||||
Size: size,
|
|
||||||
},
|
|
||||||
Reader: bytes.NewBuffer(configJSON),
|
|
||||||
}
|
|
||||||
return layer, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetSHA256Digest returns the SHA256 hash of a given buffer and returns it, and the size of buffer
|
// GetSHA256Digest returns the SHA256 hash of a given buffer and returns it, and the size of buffer
|
||||||
func GetSHA256Digest(r io.Reader) (string, int64) {
|
func GetSHA256Digest(r io.Reader) (string, int64) {
|
||||||
h := sha256.New()
|
h := sha256.New()
|
||||||
|
|||||||
@@ -1,23 +1,98 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/jmorganca/ollama/api"
|
"github.com/jmorganca/ollama/api"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestModelPrompt(t *testing.T) {
|
func TestChat(t *testing.T) {
|
||||||
var m Model
|
tests := []struct {
|
||||||
req := api.GenerateRequest{
|
name string
|
||||||
Template: "a{{ .Prompt }}b",
|
template string
|
||||||
Prompt: "<h1>",
|
msgs []api.Message
|
||||||
|
want string
|
||||||
|
wantErr string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Single Message",
|
||||||
|
template: "[INST] {{ .System }} {{ .Prompt }} [/INST]",
|
||||||
|
msgs: []api.Message{
|
||||||
|
{
|
||||||
|
Role: "system",
|
||||||
|
Content: "You are a Wizard.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: "What are the potion ingredients?",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: "[INST] You are a Wizard. What are the potion ingredients? [/INST]",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Message History",
|
||||||
|
template: "[INST] {{ .System }} {{ .Prompt }} [/INST]",
|
||||||
|
msgs: []api.Message{
|
||||||
|
{
|
||||||
|
Role: "system",
|
||||||
|
Content: "You are a Wizard.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: "What are the potion ingredients?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Role: "assistant",
|
||||||
|
Content: "sugar",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: "Anything else?",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: "[INST] You are a Wizard. What are the potion ingredients? [/INST]sugar[INST] Anything else? [/INST]",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Assistant Only",
|
||||||
|
template: "[INST] {{ .System }} {{ .Prompt }} [/INST]",
|
||||||
|
msgs: []api.Message{
|
||||||
|
{
|
||||||
|
Role: "assistant",
|
||||||
|
Content: "everything nice",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: "[INST] [/INST]everything nice",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Invalid Role",
|
||||||
|
msgs: []api.Message{
|
||||||
|
{
|
||||||
|
Role: "not-a-role",
|
||||||
|
Content: "howdy",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantErr: "invalid role: not-a-role",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
s, err := m.Prompt(req)
|
|
||||||
if err != nil {
|
for _, tt := range tests {
|
||||||
t.Fatal(err)
|
m := Model{
|
||||||
|
Template: tt.template,
|
||||||
}
|
}
|
||||||
want := "a<h1>b"
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
if s != want {
|
got, _, err := m.ChatPrompt(tt.msgs)
|
||||||
t.Errorf("got %q, want %q", s, want)
|
if tt.wantErr != "" {
|
||||||
|
if err == nil {
|
||||||
|
t.Errorf("ChatPrompt() expected error, got nil")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), tt.wantErr) {
|
||||||
|
t.Errorf("ChatPrompt() error = %v, wantErr %v", err, tt.wantErr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("ChatPrompt() got = %v, want %v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
109
server/layers.go
Normal file
109
server/layers.go
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"runtime"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/exp/slices"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Layers struct {
|
||||||
|
items []*Layer
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ls *Layers) Add(layer *Layer) {
|
||||||
|
if layer.Size > 0 {
|
||||||
|
ls.items = append(ls.items, layer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ls *Layers) Replace(layer *Layer) {
|
||||||
|
if layer.Size > 0 {
|
||||||
|
mediatype := layer.MediaType
|
||||||
|
layers := slices.DeleteFunc(ls.items, func(l *Layer) bool {
|
||||||
|
return l.MediaType == mediatype
|
||||||
|
})
|
||||||
|
|
||||||
|
ls.items = append(layers, layer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type Layer struct {
|
||||||
|
MediaType string `json:"mediaType"`
|
||||||
|
Digest string `json:"digest"`
|
||||||
|
Size int64 `json:"size"`
|
||||||
|
From string `json:"from,omitempty"`
|
||||||
|
|
||||||
|
tempFileName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
||||||
|
blobs, err := GetBlobsPath("")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
delimiter := ":"
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
delimiter = "-"
|
||||||
|
}
|
||||||
|
|
||||||
|
pattern := strings.Join([]string{"sha256", "*-partial"}, delimiter)
|
||||||
|
temp, err := os.CreateTemp(blobs, pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer temp.Close()
|
||||||
|
|
||||||
|
sha256sum := sha256.New()
|
||||||
|
n, err := io.Copy(io.MultiWriter(temp, sha256sum), r)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Layer{
|
||||||
|
MediaType: mediatype,
|
||||||
|
Digest: fmt.Sprintf("sha256:%x", sha256sum.Sum(nil)),
|
||||||
|
Size: n,
|
||||||
|
tempFileName: temp.Name(),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) {
|
||||||
|
blob, err := GetBlobsPath(digest)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
fi, err := os.Stat(blob)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Layer{
|
||||||
|
MediaType: mediatype,
|
||||||
|
Digest: digest,
|
||||||
|
Size: fi.Size(),
|
||||||
|
From: from,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Layer) Commit() (bool, error) {
|
||||||
|
// always remove temp
|
||||||
|
defer os.Remove(l.tempFileName)
|
||||||
|
|
||||||
|
blob, err := GetBlobsPath(l.Digest)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := os.Stat(blob); err != nil {
|
||||||
|
return true, os.Rename(l.tempFileName, blob)
|
||||||
|
}
|
||||||
|
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
34
server/manifests.go
Normal file
34
server/manifests.go
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
)
|
||||||
|
|
||||||
|
func WriteManifest(name string, config *Layer, layers []*Layer) error {
|
||||||
|
manifest := ManifestV2{
|
||||||
|
SchemaVersion: 2,
|
||||||
|
MediaType: "application/vnd.docker.distribution.manifest.v2+json",
|
||||||
|
Config: config,
|
||||||
|
Layers: layers,
|
||||||
|
}
|
||||||
|
|
||||||
|
var b bytes.Buffer
|
||||||
|
if err := json.NewEncoder(&b).Encode(manifest); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
modelpath := ParseModelPath(name)
|
||||||
|
manifestPath, err := modelpath.GetManifestPath()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.MkdirAll(filepath.Dir(manifestPath), 0755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return os.WriteFile(manifestPath, b.Bytes(), 0644)
|
||||||
|
}
|
||||||
@@ -67,6 +67,20 @@ func ParseModelPath(name string) ModelPath {
|
|||||||
return mp
|
return mp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var errModelPathInvalid = errors.New("invalid model path")
|
||||||
|
|
||||||
|
func (mp ModelPath) Validate() error {
|
||||||
|
if mp.Repository == "" {
|
||||||
|
return fmt.Errorf("%w: model repository name is required", errModelPathInvalid)
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.Contains(mp.Tag, ":") {
|
||||||
|
return fmt.Errorf("%w: ':' (colon) is not allowed in tag names", errModelPathInvalid)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (mp ModelPath) GetNamespaceRepository() string {
|
func (mp ModelPath) GetNamespaceRepository() string {
|
||||||
return fmt.Sprintf("%s/%s", mp.Namespace, mp.Repository)
|
return fmt.Sprintf("%s/%s", mp.Namespace, mp.Repository)
|
||||||
}
|
}
|
||||||
|
|||||||
519
server/routes.go
519
server/routes.go
@@ -2,7 +2,6 @@ package server
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"crypto/sha256"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -33,6 +32,10 @@ import (
|
|||||||
|
|
||||||
var mode string = gin.DebugMode
|
var mode string = gin.DebugMode
|
||||||
|
|
||||||
|
type Server struct {
|
||||||
|
WorkDir string
|
||||||
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
switch mode {
|
switch mode {
|
||||||
case gin.DebugMode:
|
case gin.DebugMode:
|
||||||
@@ -60,17 +63,26 @@ var loaded struct {
|
|||||||
var defaultSessionDuration = 5 * time.Minute
|
var defaultSessionDuration = 5 * time.Minute
|
||||||
|
|
||||||
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
|
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
|
||||||
func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]interface{}, sessionDuration time.Duration) error {
|
func load(c *gin.Context, modelName string, reqOpts map[string]interface{}, sessionDuration time.Duration) (*Model, error) {
|
||||||
|
model, err := GetModel(modelName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
workDir := c.GetString("workDir")
|
||||||
|
|
||||||
opts := api.DefaultOptions()
|
opts := api.DefaultOptions()
|
||||||
if err := opts.FromMap(model.Options); err != nil {
|
if err := opts.FromMap(model.Options); err != nil {
|
||||||
log.Printf("could not load model options: %v", err)
|
log.Printf("could not load model options: %v", err)
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := opts.FromMap(reqOpts); err != nil {
|
if err := opts.FromMap(reqOpts); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx := c.Request.Context()
|
||||||
|
|
||||||
// check if the loaded model is still running in a subprocess, in case something unexpected happened
|
// check if the loaded model is still running in a subprocess, in case something unexpected happened
|
||||||
if loaded.runner != nil {
|
if loaded.runner != nil {
|
||||||
if err := loaded.runner.Ping(ctx); err != nil {
|
if err := loaded.runner.Ping(ctx); err != nil {
|
||||||
@@ -97,7 +109,7 @@ func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]
|
|||||||
loaded.Options = nil
|
loaded.Options = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, opts)
|
llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// some older models are not compatible with newer versions of llama.cpp
|
// some older models are not compatible with newer versions of llama.cpp
|
||||||
// show a generalized compatibility error until there is a better way to
|
// show a generalized compatibility error until there is a better way to
|
||||||
@@ -106,7 +118,7 @@ func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]
|
|||||||
err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, model.ShortName)
|
err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, model.ShortName)
|
||||||
}
|
}
|
||||||
|
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
loaded.Model = model
|
loaded.Model = model
|
||||||
@@ -140,7 +152,7 @@ func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]
|
|||||||
}
|
}
|
||||||
|
|
||||||
loaded.expireTimer.Reset(sessionDuration)
|
loaded.expireTimer.Reset(sessionDuration)
|
||||||
return nil
|
return model, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GenerateHandler(c *gin.Context) {
|
func GenerateHandler(c *gin.Context) {
|
||||||
@@ -148,9 +160,9 @@ func GenerateHandler(c *gin.Context) {
|
|||||||
defer loaded.mu.Unlock()
|
defer loaded.mu.Unlock()
|
||||||
|
|
||||||
checkpointStart := time.Now()
|
checkpointStart := time.Now()
|
||||||
|
|
||||||
var req api.GenerateRequest
|
var req api.GenerateRequest
|
||||||
err := c.ShouldBindJSON(&req)
|
err := c.ShouldBindJSON(&req)
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case errors.Is(err, io.EOF):
|
case errors.Is(err, io.EOF):
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
||||||
@@ -173,88 +185,150 @@ func GenerateHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
model, err := GetModel(req.Model)
|
sessionDuration := defaultSessionDuration
|
||||||
|
model, err := load(c, req.Model, req.Options, sessionDuration)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
var pErr *fs.PathError
|
var pErr *fs.PathError
|
||||||
if errors.As(err, &pErr) {
|
switch {
|
||||||
|
case errors.As(err, &pErr):
|
||||||
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
|
||||||
return
|
case errors.Is(err, api.ErrInvalidOpts):
|
||||||
}
|
|
||||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
return
|
default:
|
||||||
}
|
|
||||||
|
|
||||||
workDir := c.GetString("workDir")
|
|
||||||
|
|
||||||
// TODO: set this duration from the request if specified
|
|
||||||
sessionDuration := defaultSessionDuration
|
|
||||||
if err := load(c.Request.Context(), workDir, model, req.Options, sessionDuration); err != nil {
|
|
||||||
if errors.Is(err, api.ErrInvalidOpts) {
|
|
||||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// an empty request loads the model
|
||||||
|
if req.Prompt == "" && req.Template == "" && req.System == "" {
|
||||||
|
c.JSON(http.StatusOK, api.GenerateResponse{
|
||||||
|
CreatedAt: time.Now().UTC(),
|
||||||
|
Model: req.Model,
|
||||||
|
Done: true})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
checkpointLoaded := time.Now()
|
checkpointLoaded := time.Now()
|
||||||
|
|
||||||
prompt := req.Prompt
|
var prompt string
|
||||||
if !req.Raw {
|
switch {
|
||||||
prompt, err = model.Prompt(req)
|
case req.Raw:
|
||||||
|
prompt = req.Prompt
|
||||||
|
case req.Prompt != "":
|
||||||
|
if req.Template != "" {
|
||||||
|
// override the default model template
|
||||||
|
model.Template = req.Template
|
||||||
|
}
|
||||||
|
|
||||||
|
var rebuild strings.Builder
|
||||||
|
if req.Context != nil {
|
||||||
|
// TODO: context is deprecated, at some point the context logic within this conditional should be removed
|
||||||
|
prevCtx, err := loaded.runner.Decode(c.Request.Context(), req.Context)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove leading spaces from prevCtx if present
|
||||||
|
prevCtx = strings.TrimPrefix(prevCtx, " ")
|
||||||
|
rebuild.WriteString(prevCtx)
|
||||||
|
}
|
||||||
|
p, err := model.Prompt(PromptVars{
|
||||||
|
System: req.System,
|
||||||
|
Prompt: req.Prompt,
|
||||||
|
First: len(req.Context) == 0,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rebuild.WriteString(p)
|
||||||
|
prompt = rebuild.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
ch := make(chan any)
|
ch := make(chan any)
|
||||||
|
var generated strings.Builder
|
||||||
go func() {
|
go func() {
|
||||||
defer close(ch)
|
defer close(ch)
|
||||||
// an empty request loads the model
|
|
||||||
if req.Prompt == "" && req.Template == "" && req.System == "" {
|
|
||||||
ch <- api.GenerateResponse{CreatedAt: time.Now().UTC(), Model: req.Model, Done: true}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
fn := func(r api.GenerateResponse) {
|
fn := func(r llm.PredictResult) {
|
||||||
|
// Update model expiration
|
||||||
loaded.expireAt = time.Now().Add(sessionDuration)
|
loaded.expireAt = time.Now().Add(sessionDuration)
|
||||||
loaded.expireTimer.Reset(sessionDuration)
|
loaded.expireTimer.Reset(sessionDuration)
|
||||||
|
|
||||||
r.Model = req.Model
|
// Build up the full response
|
||||||
r.CreatedAt = time.Now().UTC()
|
if _, err := generated.WriteString(r.Content); err != nil {
|
||||||
|
ch <- gin.H{"error": err.Error()}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := api.GenerateResponse{
|
||||||
|
Model: req.Model,
|
||||||
|
CreatedAt: time.Now().UTC(),
|
||||||
|
Done: r.Done,
|
||||||
|
Response: r.Content,
|
||||||
|
Metrics: api.Metrics{
|
||||||
|
PromptEvalCount: r.PromptEvalCount,
|
||||||
|
PromptEvalDuration: r.PromptEvalDuration,
|
||||||
|
EvalCount: r.EvalCount,
|
||||||
|
EvalDuration: r.EvalDuration,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
if r.Done {
|
if r.Done {
|
||||||
r.TotalDuration = time.Since(checkpointStart)
|
resp.TotalDuration = time.Since(checkpointStart)
|
||||||
r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
resp.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||||
|
|
||||||
|
if !req.Raw {
|
||||||
|
embd, err := loaded.runner.Encode(c.Request.Context(), prompt+generated.String())
|
||||||
|
if err != nil {
|
||||||
|
ch <- gin.H{"error": err.Error()}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Context = embd
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if req.Raw {
|
ch <- resp
|
||||||
// in raw mode the client must manage history on their own
|
|
||||||
r.Context = nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ch <- r
|
// Start prediction
|
||||||
|
predictReq := llm.PredictOpts{
|
||||||
|
Prompt: prompt,
|
||||||
|
Format: req.Format,
|
||||||
|
Images: req.Images,
|
||||||
}
|
}
|
||||||
|
if err := loaded.runner.Predict(c.Request.Context(), predictReq, fn); err != nil {
|
||||||
if err := loaded.runner.Predict(c.Request.Context(), req.Context, prompt, req.Format, fn); err != nil {
|
|
||||||
ch <- gin.H{"error": err.Error()}
|
ch <- gin.H{"error": err.Error()}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if req.Stream != nil && !*req.Stream {
|
if req.Stream != nil && !*req.Stream {
|
||||||
var response api.GenerateResponse
|
// Accumulate responses into the final response
|
||||||
generated := ""
|
var final api.GenerateResponse
|
||||||
|
var sb strings.Builder
|
||||||
for resp := range ch {
|
for resp := range ch {
|
||||||
if r, ok := resp.(api.GenerateResponse); ok {
|
switch r := resp.(type) {
|
||||||
generated += r.Response
|
case api.GenerateResponse:
|
||||||
response = r
|
sb.WriteString(r.Response)
|
||||||
|
final = r
|
||||||
|
case gin.H:
|
||||||
|
if errorMsg, ok := r["error"].(string); ok {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
|
||||||
|
return
|
||||||
} else {
|
} else {
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
response.Response = generated
|
|
||||||
c.JSON(http.StatusOK, response)
|
final.Response = sb.String()
|
||||||
|
c.JSON(http.StatusOK, final)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -281,15 +355,18 @@ func EmbeddingHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
model, err := GetModel(req.Model)
|
sessionDuration := defaultSessionDuration
|
||||||
|
_, err = load(c, req.Model, req.Options, sessionDuration)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
var pErr *fs.PathError
|
||||||
|
switch {
|
||||||
|
case errors.As(err, &pErr):
|
||||||
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
|
||||||
|
case errors.Is(err, api.ErrInvalidOpts):
|
||||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
return
|
default:
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
}
|
}
|
||||||
|
|
||||||
workDir := c.GetString("workDir")
|
|
||||||
if err := load(c.Request.Context(), workDir, model, req.Options, 5*time.Minute); err != nil {
|
|
||||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -416,6 +493,11 @@ func CreateModelHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := ParseModelPath(req.Name).Validate(); err != nil {
|
||||||
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if req.Path == "" && req.Modelfile == "" {
|
if req.Path == "" && req.Modelfile == "" {
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
|
||||||
return
|
return
|
||||||
@@ -538,10 +620,19 @@ func GetModelInfo(name string) (*api.ShowResponse, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
modelDetails := api.ModelDetails{
|
||||||
|
Format: model.Config.ModelFormat,
|
||||||
|
Family: model.Config.ModelFamily,
|
||||||
|
Families: model.Config.ModelFamilies,
|
||||||
|
ParameterSize: model.Config.ModelType,
|
||||||
|
QuantizationLevel: model.Config.FileType,
|
||||||
|
}
|
||||||
|
|
||||||
resp := &api.ShowResponse{
|
resp := &api.ShowResponse{
|
||||||
License: strings.Join(model.License, "\n"),
|
License: strings.Join(model.License, "\n"),
|
||||||
System: model.System,
|
System: model.System,
|
||||||
Template: model.Template,
|
Template: model.Template,
|
||||||
|
Details: modelDetails,
|
||||||
}
|
}
|
||||||
|
|
||||||
mf, err := ShowModelfile(model)
|
mf, err := ShowModelfile(model)
|
||||||
@@ -591,25 +682,42 @@ func ListModelsHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
modelResponse := func(modelName string) (api.ModelResponse, error) {
|
||||||
|
model, err := GetModel(modelName)
|
||||||
|
if err != nil {
|
||||||
|
return api.ModelResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
modelDetails := api.ModelDetails{
|
||||||
|
Format: model.Config.ModelFormat,
|
||||||
|
Family: model.Config.ModelFamily,
|
||||||
|
Families: model.Config.ModelFamilies,
|
||||||
|
ParameterSize: model.Config.ModelType,
|
||||||
|
QuantizationLevel: model.Config.FileType,
|
||||||
|
}
|
||||||
|
|
||||||
|
return api.ModelResponse{
|
||||||
|
Name: model.ShortName,
|
||||||
|
Size: model.Size,
|
||||||
|
Digest: model.Digest,
|
||||||
|
Details: modelDetails,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
walkFunc := func(path string, info os.FileInfo, _ error) error {
|
walkFunc := func(path string, info os.FileInfo, _ error) error {
|
||||||
if !info.IsDir() {
|
if !info.IsDir() {
|
||||||
dir, file := filepath.Split(path)
|
dir, file := filepath.Split(path)
|
||||||
dir = strings.Trim(strings.TrimPrefix(dir, fp), string(os.PathSeparator))
|
dir = strings.Trim(strings.TrimPrefix(dir, fp), string(os.PathSeparator))
|
||||||
tag := strings.Join([]string{dir, file}, ":")
|
tag := strings.Join([]string{dir, file}, ":")
|
||||||
|
|
||||||
mp := ParseModelPath(tag)
|
resp, err := modelResponse(tag)
|
||||||
manifest, digest, err := GetManifest(mp)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("skipping file: %s", fp)
|
log.Printf("skipping file: %s", fp)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
models = append(models, api.ModelResponse{
|
resp.ModifiedAt = info.ModTime()
|
||||||
Name: mp.GetShortTagname(),
|
models = append(models, resp)
|
||||||
Size: manifest.GetTotalSize(),
|
|
||||||
Digest: digest,
|
|
||||||
ModifiedAt: info.ModTime(),
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@@ -640,6 +748,11 @@ func CopyModelHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := ParseModelPath(req.Destination).Validate(); err != nil {
|
||||||
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if err := CopyModel(req.Source, req.Destination); err != nil {
|
if err := CopyModel(req.Source, req.Destination); err != nil {
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(err) {
|
||||||
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
|
||||||
@@ -666,37 +779,18 @@ func HeadBlobHandler(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func CreateBlobHandler(c *gin.Context) {
|
func CreateBlobHandler(c *gin.Context) {
|
||||||
targetPath, err := GetBlobsPath(c.Param("digest"))
|
layer, err := NewLayer(c.Request.Body, "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
hash := sha256.New()
|
if layer.Digest != c.Param("digest") {
|
||||||
temp, err := os.CreateTemp(filepath.Dir(targetPath), c.Param("digest")+"-")
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("digest mismatch, expected %q, got %q", c.Param("digest"), layer.Digest)})
|
||||||
if err != nil {
|
|
||||||
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
defer temp.Close()
|
|
||||||
defer os.Remove(temp.Name())
|
|
||||||
|
|
||||||
if _, err := io.Copy(temp, io.TeeReader(c.Request.Body, hash)); err != nil {
|
|
||||||
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if fmt.Sprintf("sha256:%x", hash.Sum(nil)) != c.Param("digest") {
|
if _, err := layer.Commit(); err != nil {
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "digest does not match body"})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := temp.Close(); err != nil {
|
|
||||||
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := os.Rename(temp.Name(), targetPath); err != nil {
|
|
||||||
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -710,7 +804,72 @@ var defaultAllowOrigins = []string{
|
|||||||
"0.0.0.0",
|
"0.0.0.0",
|
||||||
}
|
}
|
||||||
|
|
||||||
func Serve(ln net.Listener, allowOrigins []string) error {
|
func NewServer() (*Server, error) {
|
||||||
|
workDir, err := os.MkdirTemp("", "ollama")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Server{
|
||||||
|
WorkDir: workDir,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) GenerateRoutes() http.Handler {
|
||||||
|
var origins []string
|
||||||
|
if o := os.Getenv("OLLAMA_ORIGINS"); o != "" {
|
||||||
|
origins = strings.Split(o, ",")
|
||||||
|
}
|
||||||
|
|
||||||
|
config := cors.DefaultConfig()
|
||||||
|
config.AllowWildcard = true
|
||||||
|
|
||||||
|
config.AllowOrigins = origins
|
||||||
|
for _, allowOrigin := range defaultAllowOrigins {
|
||||||
|
config.AllowOrigins = append(config.AllowOrigins,
|
||||||
|
fmt.Sprintf("http://%s", allowOrigin),
|
||||||
|
fmt.Sprintf("https://%s", allowOrigin),
|
||||||
|
fmt.Sprintf("http://%s:*", allowOrigin),
|
||||||
|
fmt.Sprintf("https://%s:*", allowOrigin),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
r := gin.Default()
|
||||||
|
r.Use(
|
||||||
|
cors.New(config),
|
||||||
|
func(c *gin.Context) {
|
||||||
|
c.Set("workDir", s.WorkDir)
|
||||||
|
c.Next()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
r.POST("/api/pull", PullModelHandler)
|
||||||
|
r.POST("/api/generate", GenerateHandler)
|
||||||
|
r.POST("/api/chat", ChatHandler)
|
||||||
|
r.POST("/api/embeddings", EmbeddingHandler)
|
||||||
|
r.POST("/api/create", CreateModelHandler)
|
||||||
|
r.POST("/api/push", PushModelHandler)
|
||||||
|
r.POST("/api/copy", CopyModelHandler)
|
||||||
|
r.DELETE("/api/delete", DeleteModelHandler)
|
||||||
|
r.POST("/api/show", ShowModelHandler)
|
||||||
|
r.POST("/api/blobs/:digest", CreateBlobHandler)
|
||||||
|
r.HEAD("/api/blobs/:digest", HeadBlobHandler)
|
||||||
|
|
||||||
|
for _, method := range []string{http.MethodGet, http.MethodHead} {
|
||||||
|
r.Handle(method, "/", func(c *gin.Context) {
|
||||||
|
c.String(http.StatusOK, "Ollama is running")
|
||||||
|
})
|
||||||
|
|
||||||
|
r.Handle(method, "/api/tags", ListModelsHandler)
|
||||||
|
r.Handle(method, "/api/version", func(c *gin.Context) {
|
||||||
|
c.JSON(http.StatusOK, gin.H{"version": version.Version})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func Serve(ln net.Listener) error {
|
||||||
if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
|
if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
|
||||||
// clean up unused layers and manifests
|
// clean up unused layers and manifests
|
||||||
if err := PruneLayers(); err != nil {
|
if err := PruneLayers(); err != nil {
|
||||||
@@ -727,55 +886,14 @@ func Serve(ln net.Listener, allowOrigins []string) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
config := cors.DefaultConfig()
|
s, err := NewServer()
|
||||||
config.AllowWildcard = true
|
|
||||||
|
|
||||||
config.AllowOrigins = allowOrigins
|
|
||||||
for _, allowOrigin := range defaultAllowOrigins {
|
|
||||||
config.AllowOrigins = append(config.AllowOrigins,
|
|
||||||
fmt.Sprintf("http://%s", allowOrigin),
|
|
||||||
fmt.Sprintf("https://%s", allowOrigin),
|
|
||||||
fmt.Sprintf("http://%s:*", allowOrigin),
|
|
||||||
fmt.Sprintf("https://%s:*", allowOrigin),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
workDir, err := os.MkdirTemp("", "ollama")
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer os.RemoveAll(workDir)
|
r := s.GenerateRoutes()
|
||||||
|
|
||||||
r := gin.Default()
|
|
||||||
r.Use(
|
|
||||||
cors.New(config),
|
|
||||||
func(c *gin.Context) {
|
|
||||||
c.Set("workDir", workDir)
|
|
||||||
c.Next()
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
r.POST("/api/pull", PullModelHandler)
|
|
||||||
r.POST("/api/generate", GenerateHandler)
|
|
||||||
r.POST("/api/embeddings", EmbeddingHandler)
|
|
||||||
r.POST("/api/create", CreateModelHandler)
|
|
||||||
r.POST("/api/push", PushModelHandler)
|
|
||||||
r.POST("/api/copy", CopyModelHandler)
|
|
||||||
r.DELETE("/api/delete", DeleteModelHandler)
|
|
||||||
r.POST("/api/show", ShowModelHandler)
|
|
||||||
r.POST("/api/blobs/:digest", CreateBlobHandler)
|
|
||||||
r.HEAD("/api/blobs/:digest", HeadBlobHandler)
|
|
||||||
|
|
||||||
for _, method := range []string{http.MethodGet, http.MethodHead} {
|
|
||||||
r.Handle(method, "/", func(c *gin.Context) {
|
|
||||||
c.String(http.StatusOK, "Ollama is running")
|
|
||||||
})
|
|
||||||
|
|
||||||
r.Handle(method, "/api/tags", ListModelsHandler)
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Printf("Listening on %s (version %s)", ln.Addr(), version.Version)
|
log.Printf("Listening on %s (version %s)", ln.Addr(), version.Version)
|
||||||
s := &http.Server{
|
srvr := &http.Server{
|
||||||
Handler: r,
|
Handler: r,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -787,18 +905,18 @@ func Serve(ln net.Listener, allowOrigins []string) error {
|
|||||||
if loaded.runner != nil {
|
if loaded.runner != nil {
|
||||||
loaded.runner.Close()
|
loaded.runner.Close()
|
||||||
}
|
}
|
||||||
os.RemoveAll(workDir)
|
os.RemoveAll(s.WorkDir)
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if runtime.GOOS == "linux" {
|
if runtime.GOOS == "linux" {
|
||||||
// check compatibility to log warnings
|
// check compatibility to log warnings
|
||||||
if _, err := llm.CheckVRAM(); err != nil {
|
if _, err := llm.CheckVRAM(); err != nil {
|
||||||
log.Printf(err.Error())
|
log.Print(err.Error())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return s.Serve(ln)
|
return srvr.Serve(ln)
|
||||||
}
|
}
|
||||||
|
|
||||||
func waitForStream(c *gin.Context, ch chan interface{}) {
|
func waitForStream(c *gin.Context, ch chan interface{}) {
|
||||||
@@ -850,3 +968,132 @@ func streamResponse(c *gin.Context, ch chan any) {
|
|||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ChatHandler(c *gin.Context) {
|
||||||
|
loaded.mu.Lock()
|
||||||
|
defer loaded.mu.Unlock()
|
||||||
|
|
||||||
|
checkpointStart := time.Now()
|
||||||
|
|
||||||
|
var req api.ChatRequest
|
||||||
|
err := c.ShouldBindJSON(&req)
|
||||||
|
switch {
|
||||||
|
case errors.Is(err, io.EOF):
|
||||||
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
||||||
|
return
|
||||||
|
case err != nil:
|
||||||
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate the request
|
||||||
|
switch {
|
||||||
|
case req.Model == "":
|
||||||
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
||||||
|
return
|
||||||
|
case len(req.Format) > 0 && req.Format != "json":
|
||||||
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
sessionDuration := defaultSessionDuration
|
||||||
|
model, err := load(c, req.Model, req.Options, sessionDuration)
|
||||||
|
if err != nil {
|
||||||
|
var pErr *fs.PathError
|
||||||
|
switch {
|
||||||
|
case errors.As(err, &pErr):
|
||||||
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
|
||||||
|
case errors.Is(err, api.ErrInvalidOpts):
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
|
default:
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// an empty request loads the model
|
||||||
|
if len(req.Messages) == 0 {
|
||||||
|
c.JSON(http.StatusOK, api.ChatResponse{CreatedAt: time.Now().UTC(), Model: req.Model, Done: true, Message: api.Message{Role: "assistant"}})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
checkpointLoaded := time.Now()
|
||||||
|
|
||||||
|
prompt, images, err := model.ChatPrompt(req.Messages)
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ch := make(chan any)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer close(ch)
|
||||||
|
|
||||||
|
fn := func(r llm.PredictResult) {
|
||||||
|
// Update model expiration
|
||||||
|
loaded.expireAt = time.Now().Add(sessionDuration)
|
||||||
|
loaded.expireTimer.Reset(sessionDuration)
|
||||||
|
|
||||||
|
resp := api.ChatResponse{
|
||||||
|
Model: req.Model,
|
||||||
|
CreatedAt: time.Now().UTC(),
|
||||||
|
Message: api.Message{Role: "assistant", Content: r.Content},
|
||||||
|
Done: r.Done,
|
||||||
|
Metrics: api.Metrics{
|
||||||
|
PromptEvalCount: r.PromptEvalCount,
|
||||||
|
PromptEvalDuration: r.PromptEvalDuration,
|
||||||
|
EvalCount: r.EvalCount,
|
||||||
|
EvalDuration: r.EvalDuration,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.Done {
|
||||||
|
resp.TotalDuration = time.Since(checkpointStart)
|
||||||
|
resp.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||||
|
}
|
||||||
|
|
||||||
|
ch <- resp
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start prediction
|
||||||
|
predictReq := llm.PredictOpts{
|
||||||
|
Prompt: prompt,
|
||||||
|
Format: req.Format,
|
||||||
|
Images: images,
|
||||||
|
}
|
||||||
|
if err := loaded.runner.Predict(c.Request.Context(), predictReq, fn); err != nil {
|
||||||
|
ch <- gin.H{"error": err.Error()}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if req.Stream != nil && !*req.Stream {
|
||||||
|
// Accumulate responses into the final response
|
||||||
|
var final api.ChatResponse
|
||||||
|
var sb strings.Builder
|
||||||
|
for resp := range ch {
|
||||||
|
switch r := resp.(type) {
|
||||||
|
case api.ChatResponse:
|
||||||
|
sb.WriteString(r.Message.Content)
|
||||||
|
final = r
|
||||||
|
case gin.H:
|
||||||
|
if errorMsg, ok := r["error"].(string); ok {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
|
||||||
|
return
|
||||||
|
} else {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final.Message = api.Message{Role: "assistant", Content: sb.String()}
|
||||||
|
c.JSON(http.StatusOK, final)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
streamResponse(c, ch)
|
||||||
|
}
|
||||||
|
|||||||
204
server/routes_test.go
Normal file
204
server/routes_test.go
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
|
"github.com/jmorganca/ollama/api"
|
||||||
|
"github.com/jmorganca/ollama/parser"
|
||||||
|
)
|
||||||
|
|
||||||
|
func setupServer(t *testing.T) (*Server, error) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
return NewServer()
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_Routes(t *testing.T) {
|
||||||
|
type testCase struct {
|
||||||
|
Name string
|
||||||
|
Method string
|
||||||
|
Path string
|
||||||
|
Setup func(t *testing.T, req *http.Request)
|
||||||
|
Expected func(t *testing.T, resp *http.Response)
|
||||||
|
}
|
||||||
|
|
||||||
|
createTestFile := func(t *testing.T, name string) string {
|
||||||
|
f, err := os.CreateTemp(t.TempDir(), name)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
_, err = f.Write([]byte("GGUF"))
|
||||||
|
assert.Nil(t, err)
|
||||||
|
_, err = f.Write([]byte{0x2, 0})
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
return f.Name()
|
||||||
|
}
|
||||||
|
|
||||||
|
createTestModel := func(t *testing.T, name string) {
|
||||||
|
fname := createTestFile(t, "ollama-model")
|
||||||
|
|
||||||
|
modelfile := strings.NewReader(fmt.Sprintf("FROM %s", fname))
|
||||||
|
commands, err := parser.Parse(modelfile)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
fn := func(resp api.ProgressResponse) {
|
||||||
|
t.Logf("Status: %s", resp.Status)
|
||||||
|
}
|
||||||
|
err = CreateModel(context.TODO(), name, "", commands, fn)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
testCases := []testCase{
|
||||||
|
{
|
||||||
|
Name: "Version Handler",
|
||||||
|
Method: http.MethodGet,
|
||||||
|
Path: "/api/version",
|
||||||
|
Setup: func(t *testing.T, req *http.Request) {
|
||||||
|
},
|
||||||
|
Expected: func(t *testing.T, resp *http.Response) {
|
||||||
|
contentType := resp.Header.Get("Content-Type")
|
||||||
|
assert.Equal(t, contentType, "application/json; charset=utf-8")
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
assert.Equal(t, `{"version":"0.0.0"}`, string(body))
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "Tags Handler (no tags)",
|
||||||
|
Method: http.MethodGet,
|
||||||
|
Path: "/api/tags",
|
||||||
|
Expected: func(t *testing.T, resp *http.Response) {
|
||||||
|
contentType := resp.Header.Get("Content-Type")
|
||||||
|
assert.Equal(t, contentType, "application/json; charset=utf-8")
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
var modelList api.ListResponse
|
||||||
|
|
||||||
|
err = json.Unmarshal(body, &modelList)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, 0, len(modelList.Models))
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "Tags Handler (yes tags)",
|
||||||
|
Method: http.MethodGet,
|
||||||
|
Path: "/api/tags",
|
||||||
|
Setup: func(t *testing.T, req *http.Request) {
|
||||||
|
createTestModel(t, "test-model")
|
||||||
|
},
|
||||||
|
Expected: func(t *testing.T, resp *http.Response) {
|
||||||
|
contentType := resp.Header.Get("Content-Type")
|
||||||
|
assert.Equal(t, contentType, "application/json; charset=utf-8")
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
var modelList api.ListResponse
|
||||||
|
err = json.Unmarshal(body, &modelList)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, 1, len(modelList.Models))
|
||||||
|
assert.Equal(t, modelList.Models[0].Name, "test-model:latest")
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "Create Model Handler",
|
||||||
|
Method: http.MethodPost,
|
||||||
|
Path: "/api/create",
|
||||||
|
Setup: func(t *testing.T, req *http.Request) {
|
||||||
|
f, err := os.CreateTemp(t.TempDir(), "ollama-model")
|
||||||
|
assert.Nil(t, err)
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
stream := false
|
||||||
|
createReq := api.CreateRequest{
|
||||||
|
Name: "t-bone",
|
||||||
|
Modelfile: fmt.Sprintf("FROM %s", f.Name()),
|
||||||
|
Stream: &stream,
|
||||||
|
}
|
||||||
|
jsonData, err := json.Marshal(createReq)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
req.Body = io.NopCloser(bytes.NewReader(jsonData))
|
||||||
|
},
|
||||||
|
Expected: func(t *testing.T, resp *http.Response) {
|
||||||
|
contentType := resp.Header.Get("Content-Type")
|
||||||
|
assert.Equal(t, "application/json", contentType)
|
||||||
|
_, err := io.ReadAll(resp.Body)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
assert.Equal(t, resp.StatusCode, 200)
|
||||||
|
|
||||||
|
model, err := GetModel("t-bone")
|
||||||
|
assert.Nil(t, err)
|
||||||
|
assert.Equal(t, "t-bone:latest", model.ShortName)
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "Copy Model Handler",
|
||||||
|
Method: http.MethodPost,
|
||||||
|
Path: "/api/copy",
|
||||||
|
Setup: func(t *testing.T, req *http.Request) {
|
||||||
|
createTestModel(t, "hamshank")
|
||||||
|
copyReq := api.CopyRequest{
|
||||||
|
Source: "hamshank",
|
||||||
|
Destination: "beefsteak",
|
||||||
|
}
|
||||||
|
jsonData, err := json.Marshal(copyReq)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
req.Body = io.NopCloser(bytes.NewReader(jsonData))
|
||||||
|
},
|
||||||
|
Expected: func(t *testing.T, resp *http.Response) {
|
||||||
|
model, err := GetModel("beefsteak")
|
||||||
|
assert.Nil(t, err)
|
||||||
|
assert.Equal(t, "beefsteak:latest", model.ShortName)
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
s, err := setupServer(t)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
router := s.GenerateRoutes()
|
||||||
|
|
||||||
|
httpSrv := httptest.NewServer(router)
|
||||||
|
t.Cleanup(httpSrv.Close)
|
||||||
|
|
||||||
|
workDir, err := os.MkdirTemp("", "ollama-test")
|
||||||
|
assert.Nil(t, err)
|
||||||
|
defer os.RemoveAll(workDir)
|
||||||
|
os.Setenv("OLLAMA_MODELS", workDir)
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Logf("Running Test: [%s]", tc.Name)
|
||||||
|
u := httpSrv.URL + tc.Path
|
||||||
|
req, err := http.NewRequestWithContext(context.TODO(), tc.Method, u, nil)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
if tc.Setup != nil {
|
||||||
|
tc.Setup(t, req)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := httpSrv.Client().Do(req)
|
||||||
|
defer resp.Body.Close()
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
if tc.Expected != nil {
|
||||||
|
tc.Expected(t, resp)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"crypto/md5"
|
"crypto/md5"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"hash"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"math"
|
"math"
|
||||||
@@ -102,7 +103,7 @@ func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *Reg
|
|||||||
}
|
}
|
||||||
|
|
||||||
// set part.N to the current number of parts
|
// set part.N to the current number of parts
|
||||||
b.Parts = append(b.Parts, blobUploadPart{blobUpload: b, N: len(b.Parts), Offset: offset, Size: size})
|
b.Parts = append(b.Parts, blobUploadPart{N: len(b.Parts), Offset: offset, Size: size})
|
||||||
offset += size
|
offset += size
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -147,14 +148,13 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
|
|||||||
g.Go(func() error {
|
g.Go(func() error {
|
||||||
var err error
|
var err error
|
||||||
for try := 0; try < maxRetries; try++ {
|
for try := 0; try < maxRetries; try++ {
|
||||||
err = b.uploadChunk(inner, http.MethodPatch, requestURL, part, opts)
|
err = b.uploadPart(inner, http.MethodPatch, requestURL, part, opts)
|
||||||
switch {
|
switch {
|
||||||
case errors.Is(err, context.Canceled):
|
case errors.Is(err, context.Canceled):
|
||||||
return err
|
return err
|
||||||
case errors.Is(err, errMaxRetriesExceeded):
|
case errors.Is(err, errMaxRetriesExceeded):
|
||||||
return err
|
return err
|
||||||
case err != nil:
|
case err != nil:
|
||||||
part.Reset()
|
|
||||||
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
||||||
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
|
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
|
||||||
time.Sleep(sleep)
|
time.Sleep(sleep)
|
||||||
@@ -176,17 +176,10 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
|
|||||||
|
|
||||||
requestURL := <-b.nextURL
|
requestURL := <-b.nextURL
|
||||||
|
|
||||||
var sb strings.Builder
|
|
||||||
|
|
||||||
// calculate md5 checksum and add it to the commit request
|
// calculate md5 checksum and add it to the commit request
|
||||||
|
var sb strings.Builder
|
||||||
for _, part := range b.Parts {
|
for _, part := range b.Parts {
|
||||||
hash := md5.New()
|
sb.Write(part.Sum(nil))
|
||||||
if _, err := io.Copy(hash, io.NewSectionReader(b.file, part.Offset, part.Size)); err != nil {
|
|
||||||
b.err = err
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
sb.Write(hash.Sum(nil))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
md5sum := md5.Sum([]byte(sb.String()))
|
md5sum := md5.Sum([]byte(sb.String()))
|
||||||
@@ -201,27 +194,25 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
|
|||||||
headers.Set("Content-Length", "0")
|
headers.Set("Content-Length", "0")
|
||||||
|
|
||||||
for try := 0; try < maxRetries; try++ {
|
for try := 0; try < maxRetries; try++ {
|
||||||
resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
|
var resp *http.Response
|
||||||
if err != nil {
|
resp, err = makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
|
||||||
b.err = err
|
|
||||||
if errors.Is(err, context.Canceled) {
|
if errors.Is(err, context.Canceled) {
|
||||||
return
|
break
|
||||||
}
|
} else if err != nil {
|
||||||
|
|
||||||
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
||||||
log.Printf("%s complete upload attempt %d failed: %v, retrying in %s", b.Digest[7:19], try, err, sleep)
|
log.Printf("%s complete upload attempt %d failed: %v, retrying in %s", b.Digest[7:19], try, err, sleep)
|
||||||
time.Sleep(sleep)
|
time.Sleep(sleep)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
b.err = nil
|
b.err = err
|
||||||
b.done = true
|
b.done = true
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *RegistryOptions) error {
|
func (b *blobUpload) uploadPart(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *RegistryOptions) error {
|
||||||
headers := make(http.Header)
|
headers := make(http.Header)
|
||||||
headers.Set("Content-Type", "application/octet-stream")
|
headers.Set("Content-Type", "application/octet-stream")
|
||||||
headers.Set("Content-Length", fmt.Sprintf("%d", part.Size))
|
headers.Set("Content-Length", fmt.Sprintf("%d", part.Size))
|
||||||
@@ -232,8 +223,13 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
|
|||||||
}
|
}
|
||||||
|
|
||||||
sr := io.NewSectionReader(b.file, part.Offset, part.Size)
|
sr := io.NewSectionReader(b.file, part.Offset, part.Size)
|
||||||
resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(sr, part), opts)
|
|
||||||
|
md5sum := md5.New()
|
||||||
|
w := &progressWriter{blobUpload: b}
|
||||||
|
|
||||||
|
resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(sr, io.MultiWriter(w, md5sum)), opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
w.Rollback()
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
@@ -245,11 +241,13 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
|
|||||||
|
|
||||||
nextURL, err := url.Parse(location)
|
nextURL, err := url.Parse(location)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
w.Rollback()
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case resp.StatusCode == http.StatusTemporaryRedirect:
|
case resp.StatusCode == http.StatusTemporaryRedirect:
|
||||||
|
w.Rollback()
|
||||||
b.nextURL <- nextURL
|
b.nextURL <- nextURL
|
||||||
|
|
||||||
redirectURL, err := resp.Location()
|
redirectURL, err := resp.Location()
|
||||||
@@ -259,14 +257,13 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
|
|||||||
|
|
||||||
// retry uploading to the redirect URL
|
// retry uploading to the redirect URL
|
||||||
for try := 0; try < maxRetries; try++ {
|
for try := 0; try < maxRetries; try++ {
|
||||||
err = b.uploadChunk(ctx, http.MethodPut, redirectURL, part, nil)
|
err = b.uploadPart(ctx, http.MethodPut, redirectURL, part, nil)
|
||||||
switch {
|
switch {
|
||||||
case errors.Is(err, context.Canceled):
|
case errors.Is(err, context.Canceled):
|
||||||
return err
|
return err
|
||||||
case errors.Is(err, errMaxRetriesExceeded):
|
case errors.Is(err, errMaxRetriesExceeded):
|
||||||
return err
|
return err
|
||||||
case err != nil:
|
case err != nil:
|
||||||
part.Reset()
|
|
||||||
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
||||||
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
|
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
|
||||||
time.Sleep(sleep)
|
time.Sleep(sleep)
|
||||||
@@ -279,6 +276,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
|
|||||||
return fmt.Errorf("%w: %w", errMaxRetriesExceeded, err)
|
return fmt.Errorf("%w: %w", errMaxRetriesExceeded, err)
|
||||||
|
|
||||||
case resp.StatusCode == http.StatusUnauthorized:
|
case resp.StatusCode == http.StatusUnauthorized:
|
||||||
|
w.Rollback()
|
||||||
auth := resp.Header.Get("www-authenticate")
|
auth := resp.Header.Get("www-authenticate")
|
||||||
authRedir := ParseAuthRedirectString(auth)
|
authRedir := ParseAuthRedirectString(auth)
|
||||||
token, err := getAuthToken(ctx, authRedir)
|
token, err := getAuthToken(ctx, authRedir)
|
||||||
@@ -289,6 +287,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
|
|||||||
opts.Token = token
|
opts.Token = token
|
||||||
fallthrough
|
fallthrough
|
||||||
case resp.StatusCode >= http.StatusBadRequest:
|
case resp.StatusCode >= http.StatusBadRequest:
|
||||||
|
w.Rollback()
|
||||||
body, err := io.ReadAll(resp.Body)
|
body, err := io.ReadAll(resp.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -301,6 +300,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
|
|||||||
b.nextURL <- nextURL
|
b.nextURL <- nextURL
|
||||||
}
|
}
|
||||||
|
|
||||||
|
part.Hash = md5sum
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -344,19 +344,23 @@ type blobUploadPart struct {
|
|||||||
N int
|
N int
|
||||||
Offset int64
|
Offset int64
|
||||||
Size int64
|
Size int64
|
||||||
|
hash.Hash
|
||||||
|
}
|
||||||
|
|
||||||
|
type progressWriter struct {
|
||||||
written int64
|
written int64
|
||||||
*blobUpload
|
*blobUpload
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *blobUploadPart) Write(b []byte) (n int, err error) {
|
func (p *progressWriter) Write(b []byte) (n int, err error) {
|
||||||
n = len(b)
|
n = len(b)
|
||||||
p.written += int64(n)
|
p.written += int64(n)
|
||||||
p.Completed.Add(int64(n))
|
p.Completed.Add(int64(n))
|
||||||
return n, nil
|
return n, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *blobUploadPart) Reset() {
|
func (p *progressWriter) Rollback() {
|
||||||
p.Completed.Add(-int64(p.written))
|
p.Completed.Add(-p.written)
|
||||||
p.written = 0
|
p.written = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user