mirror of
https://github.com/ollama/ollama.git
synced 2026-04-25 18:25:42 +02:00
Compare commits
7 Commits
parth/fix-
...
mxyng/func
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5bcad12cd4 | ||
|
|
aac17d5f15 | ||
|
|
2ddc32d5c5 | ||
|
|
2cde4b8817 | ||
|
|
87f0a49fe6 | ||
|
|
0f06a6daa7 | ||
|
|
8f805dd74b |
@@ -8,8 +8,6 @@ linters:
|
||||
- containedctx
|
||||
- contextcheck
|
||||
- errcheck
|
||||
- exportloopref
|
||||
- gci
|
||||
- gocheckcompilerdirectives
|
||||
- gofmt
|
||||
- gofumpt
|
||||
@@ -30,8 +28,6 @@ linters:
|
||||
- wastedassign
|
||||
- whitespace
|
||||
linters-settings:
|
||||
gci:
|
||||
sections: [standard, default, localmodule]
|
||||
staticcheck:
|
||||
checks:
|
||||
- all
|
||||
|
||||
2
Makefile
2
Makefile
@@ -8,11 +8,9 @@ include make/cuda-v12-defs.make
|
||||
include make/rocm-defs.make
|
||||
|
||||
ifeq ($(CUSTOM_CPU_FLAGS),)
|
||||
ifneq ($(OS),darwin)
|
||||
ifeq ($(ARCH),amd64)
|
||||
RUNNER_TARGETS=cpu
|
||||
endif
|
||||
endif
|
||||
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
|
||||
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
|
||||
ifneq ($(CUDA_11_COMPILER),)
|
||||
|
||||
@@ -407,8 +407,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
|
||||
### Database
|
||||
|
||||
- [PostgreSQL extension pgai](https://github.com/timescale/pgai) (Create and search embeddings from Ollama models using pgvector)
|
||||
- [Get started guide](https://github.com/timescale/pgai/blob/main/docs/ollama.md)
|
||||
- [pgai](https://github.com/timescale/pgai) - PostgreSQL as a vector database (Create and search embeddings from Ollama models using pgvector)
|
||||
- [Get started guide](https://github.com/timescale/pgai/blob/main/docs/vectorizer-quick-start.md)
|
||||
- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps)
|
||||
- [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama)
|
||||
- [Kangaroo](https://github.com/dbkangaroo/kangaroo) (AI-powered SQL client and admin tool for popular databases)
|
||||
|
||||
@@ -674,21 +674,6 @@ type CompletionResponse struct {
|
||||
}
|
||||
|
||||
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
|
||||
if err := s.sem.Acquire(ctx, 1); err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
slog.Info("aborting completion request due to client closing the connection")
|
||||
} else {
|
||||
slog.Error("Failed to acquire semaphore", "error", err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer s.sem.Release(1)
|
||||
|
||||
// put an upper limit on num_predict to avoid the model running on forever
|
||||
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
|
||||
req.Options.NumPredict = 10 * s.options.NumCtx
|
||||
}
|
||||
|
||||
request := map[string]any{
|
||||
"prompt": req.Prompt,
|
||||
"stream": true,
|
||||
@@ -714,30 +699,51 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
||||
"cache_prompt": true,
|
||||
}
|
||||
|
||||
// Make sure the server is ready
|
||||
status, err := s.getServerStatusRetry(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
} else if status != ServerStatusReady {
|
||||
return fmt.Errorf("unexpected server status: %s", status.ToString())
|
||||
}
|
||||
|
||||
if len(req.Format) > 0 {
|
||||
switch {
|
||||
case bytes.Equal(req.Format, []byte(`"json"`)):
|
||||
switch string(req.Format) {
|
||||
case `null`, `""`:
|
||||
// Field was set, but "missing" a value. We accept
|
||||
// these as "not set".
|
||||
break
|
||||
case `"json"`:
|
||||
request["grammar"] = grammarJSON
|
||||
case bytes.HasPrefix(req.Format, []byte("{")):
|
||||
default:
|
||||
if req.Format[0] != '{' {
|
||||
return fmt.Errorf("invalid format: %q; expected \"json\" or a valid JSON Schema object", req.Format)
|
||||
}
|
||||
|
||||
// User provided a JSON schema
|
||||
g := llama.SchemaToGrammar(req.Format)
|
||||
if g == nil {
|
||||
return fmt.Errorf("invalid JSON schema in format")
|
||||
}
|
||||
request["grammar"] = string(g)
|
||||
default:
|
||||
return errors.New(`invalid format: expected "json" or a JSON schema`)
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.sem.Acquire(ctx, 1); err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
slog.Info("aborting completion request due to client closing the connection")
|
||||
} else {
|
||||
slog.Error("Failed to acquire semaphore", "error", err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer s.sem.Release(1)
|
||||
|
||||
// put an upper limit on num_predict to avoid the model running on forever
|
||||
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
|
||||
req.Options.NumPredict = 10 * s.options.NumCtx
|
||||
}
|
||||
|
||||
// Make sure the server is ready
|
||||
status, err := s.getServerStatusRetry(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
} else if status != ServerStatusReady {
|
||||
return fmt.Errorf("unexpected server status: %s", status.ToString())
|
||||
}
|
||||
|
||||
// Handling JSON marshaling with special characters unescaped.
|
||||
buffer := &bytes.Buffer{}
|
||||
enc := json.NewEncoder(buffer)
|
||||
|
||||
72
llm/server_test.go
Normal file
72
llm/server_test.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"golang.org/x/sync/semaphore"
|
||||
)
|
||||
|
||||
func TestLLMServerCompletionFormat(t *testing.T) {
|
||||
// This test was written to fix an already deployed issue. It is a bit
|
||||
// of a mess, and but it's good enough, until we can refactoring the
|
||||
// Completion method to be more testable.
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
s := &llmServer{
|
||||
sem: semaphore.NewWeighted(1), // required to prevent nil panic
|
||||
}
|
||||
|
||||
checkInvalid := func(format string) {
|
||||
t.Helper()
|
||||
err := s.Completion(ctx, CompletionRequest{
|
||||
Options: new(api.Options),
|
||||
Format: []byte(format),
|
||||
}, nil)
|
||||
|
||||
want := fmt.Sprintf("invalid format: %q; expected \"json\" or a valid JSON Schema", format)
|
||||
if err == nil || !strings.Contains(err.Error(), want) {
|
||||
t.Fatalf("err = %v; want %q", err, want)
|
||||
}
|
||||
}
|
||||
|
||||
checkInvalid("X") // invalid format
|
||||
checkInvalid(`"X"`) // invalid JSON Schema
|
||||
|
||||
cancel() // prevent further processing if request makes it past the format check
|
||||
|
||||
checkValid := func(err error) {
|
||||
t.Helper()
|
||||
if !errors.Is(err, context.Canceled) {
|
||||
t.Fatalf("Completion: err = %v; expected context.Canceled", err)
|
||||
}
|
||||
}
|
||||
|
||||
valids := []string{
|
||||
// "missing"
|
||||
``,
|
||||
`""`,
|
||||
`null`,
|
||||
|
||||
// JSON
|
||||
`"json"`,
|
||||
`{"type":"object"}`,
|
||||
}
|
||||
for _, valid := range valids {
|
||||
err := s.Completion(ctx, CompletionRequest{
|
||||
Options: new(api.Options),
|
||||
Format: []byte(valid),
|
||||
}, nil)
|
||||
checkValid(err)
|
||||
}
|
||||
|
||||
err := s.Completion(ctx, CompletionRequest{
|
||||
Options: new(api.Options),
|
||||
Format: nil, // missing format
|
||||
}, nil)
|
||||
checkValid(err)
|
||||
}
|
||||
@@ -19,6 +19,7 @@ const config: ForgeConfig = {
|
||||
icon: './assets/icon.icns',
|
||||
extraResource: [
|
||||
'../dist/ollama',
|
||||
'../dist/darwin-amd64/lib',
|
||||
path.join(__dirname, './assets/iconTemplate.png'),
|
||||
path.join(__dirname, './assets/iconTemplate@2x.png'),
|
||||
path.join(__dirname, './assets/iconUpdateTemplate.png'),
|
||||
@@ -42,7 +43,7 @@ const config: ForgeConfig = {
|
||||
}
|
||||
: {}),
|
||||
osxUniversal: {
|
||||
x64ArchFiles: '**/ollama',
|
||||
x64ArchFiles: '**/ollama*',
|
||||
},
|
||||
},
|
||||
rebuildConfig: {},
|
||||
|
||||
@@ -72,6 +72,7 @@ func locateRunnersOnce() {
|
||||
paths := []string{
|
||||
filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"),
|
||||
filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"),
|
||||
filepath.Join(filepath.Dir(exe), "lib", "ollama", "runners"),
|
||||
}
|
||||
for _, path := range paths {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
|
||||
@@ -18,10 +18,18 @@ rm -rf llama/build dist/darwin-*
|
||||
echo "Building darwin arm64"
|
||||
GOOS=darwin ARCH=arm64 GOARCH=arm64 make -j 8 dist
|
||||
echo "Building darwin amd64 with AVX enabled"
|
||||
GOOS=darwin ARCH=amd64 GOARCH=amd64 CUSTOM_CPU_FLAGS="avx" make -j 8 dist
|
||||
GOOS=darwin ARCH=amd64 GOARCH=amd64 CUSTOM_CPU_FLAGS="avx" make -j 8 dist_exe
|
||||
|
||||
# Generate the universal ollama binary for stand-alone usage: metal + avx
|
||||
lipo -create -output dist/ollama-darwin dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
|
||||
|
||||
echo "Building darwin amd64 with runners"
|
||||
rm dist/darwin-amd64/bin/ollama
|
||||
GOOS=darwin ARCH=amd64 GOARCH=amd64 make -j 8 dist
|
||||
# Generate the universal ollama binary for the app bundle: metal + no-avx
|
||||
lipo -create -output dist/ollama dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
|
||||
|
||||
|
||||
if [ -n "$APPLE_IDENTITY" ]; then
|
||||
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
|
||||
else
|
||||
@@ -48,5 +56,4 @@ ditto -c -k --keepParent dist/ollama dist/temp.zip
|
||||
if [ -n "$APPLE_IDENTITY" ]; then
|
||||
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
|
||||
fi
|
||||
mv dist/ollama dist/ollama-darwin
|
||||
rm -f dist/temp.zip
|
||||
|
||||
@@ -302,7 +302,7 @@ func parseObjects(s string) []map[string]any {
|
||||
// mxyng: this only really works if the input contains tool calls in some JSON format
|
||||
func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
|
||||
// create a subtree from the node that ranges over .ToolCalls
|
||||
tmpl := m.Template.Subtree(func(n parse.Node) bool {
|
||||
tmpl := m.Template.Sub(func(n parse.Node) bool {
|
||||
if t, ok := n.(*parse.RangeNode); ok {
|
||||
return slices.Contains(template.Identifiers(t.Pipe), "ToolCalls")
|
||||
}
|
||||
@@ -315,7 +315,7 @@ func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
|
||||
}
|
||||
|
||||
var b bytes.Buffer
|
||||
if err := tmpl.Execute(&b, map[string][]api.ToolCall{
|
||||
if err := tmpl.Template().Execute(&b, map[string][]api.ToolCall{
|
||||
"ToolCalls": {
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
|
||||
@@ -518,8 +518,8 @@ func TestCreateTemplateSystem(t *testing.T) {
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status code 400, actual %d", w.Code)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status code 200, actual %d", w.Code)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -93,8 +93,8 @@ func Named(s string) (*named, error) {
|
||||
var DefaultTemplate, _ = Parse("{{ .Prompt }}")
|
||||
|
||||
type Template struct {
|
||||
*template.Template
|
||||
raw string
|
||||
tree *parse.Tree
|
||||
raw string
|
||||
}
|
||||
|
||||
// response is a template node that can be added to templates that don't already have one
|
||||
@@ -124,17 +124,18 @@ var funcs = template.FuncMap{
|
||||
}
|
||||
|
||||
func Parse(s string) (*Template, error) {
|
||||
tmpl := template.New("").Option("missingkey=zero").Funcs(funcs)
|
||||
tree := parse.New("")
|
||||
tree.Mode = tree.Mode | parse.SkipFuncCheck
|
||||
|
||||
tmpl, err := tmpl.Parse(s)
|
||||
tree, err := tree.Parse(s, "", "", map[string]*parse.Tree{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
t := Template{Template: tmpl, raw: s}
|
||||
t := Template{tree, s}
|
||||
if vars := t.Vars(); !slices.Contains(vars, "messages") && !slices.Contains(vars, "response") {
|
||||
// touch up the template and append {{ .Response }}
|
||||
tmpl.Tree.Root.Nodes = append(tmpl.Tree.Root.Nodes, &response)
|
||||
t.tree.Root.Nodes = append(t.tree.Root.Nodes, &response)
|
||||
}
|
||||
|
||||
return &t, nil
|
||||
@@ -146,10 +147,8 @@ func (t *Template) String() string {
|
||||
|
||||
func (t *Template) Vars() []string {
|
||||
var vars []string
|
||||
for _, tt := range t.Templates() {
|
||||
for _, n := range tt.Root.Nodes {
|
||||
vars = append(vars, Identifiers(n)...)
|
||||
}
|
||||
for _, n := range t.tree.Root.Nodes {
|
||||
vars = append(vars, Identifiers(n)...)
|
||||
}
|
||||
|
||||
set := make(map[string]struct{})
|
||||
@@ -172,7 +171,8 @@ type Values struct {
|
||||
forceLegacy bool
|
||||
}
|
||||
|
||||
func (t *Template) Subtree(fn func(parse.Node) bool) *template.Template {
|
||||
// Sub returns a new template with the subtree that matches the predicate
|
||||
func (t *Template) Sub(fn func(parse.Node) bool) *Template {
|
||||
var walk func(parse.Node) parse.Node
|
||||
walk = func(n parse.Node) parse.Node {
|
||||
if fn(n) {
|
||||
@@ -205,29 +205,34 @@ func (t *Template) Subtree(fn func(parse.Node) bool) *template.Template {
|
||||
return nil
|
||||
}
|
||||
|
||||
if n := walk(t.Tree.Root); n != nil {
|
||||
return (&template.Template{
|
||||
Tree: &parse.Tree{
|
||||
if n := walk(t.tree.Root); n != nil {
|
||||
return &Template{
|
||||
tree: &parse.Tree{
|
||||
Root: &parse.ListNode{
|
||||
Nodes: []parse.Node{n},
|
||||
},
|
||||
},
|
||||
}).Funcs(funcs)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *Template) Template() *template.Template {
|
||||
return template.Must(template.New("").Option("missingkey=zero").Funcs(funcs).AddParseTree("", t.tree))
|
||||
}
|
||||
|
||||
func (t *Template) Execute(w io.Writer, v Values) error {
|
||||
tmpl := t.Template()
|
||||
system, messages := collate(v.Messages)
|
||||
if v.Prompt != "" && v.Suffix != "" {
|
||||
return t.Template.Execute(w, map[string]any{
|
||||
return tmpl.Execute(w, map[string]any{
|
||||
"Prompt": v.Prompt,
|
||||
"Suffix": v.Suffix,
|
||||
"Response": "",
|
||||
})
|
||||
} else if !v.forceLegacy && slices.Contains(t.Vars(), "messages") {
|
||||
return t.Template.Execute(w, map[string]any{
|
||||
return tmpl.Execute(w, map[string]any{
|
||||
"System": system,
|
||||
"Messages": messages,
|
||||
"Tools": v.Tools,
|
||||
@@ -240,7 +245,7 @@ func (t *Template) Execute(w io.Writer, v Values) error {
|
||||
var prompt, response string
|
||||
for _, m := range messages {
|
||||
execute := func() error {
|
||||
if err := t.Template.Execute(&b, map[string]any{
|
||||
if err := tmpl.Execute(&b, map[string]any{
|
||||
"System": system,
|
||||
"Prompt": prompt,
|
||||
"Response": response,
|
||||
@@ -275,7 +280,7 @@ func (t *Template) Execute(w io.Writer, v Values) error {
|
||||
}
|
||||
|
||||
var cut bool
|
||||
nodes := deleteNode(t.Template.Root.Copy(), func(n parse.Node) bool {
|
||||
nodes := deleteNode(t.tree.Root.Copy(), func(n parse.Node) bool {
|
||||
if field, ok := n.(*parse.FieldNode); ok && slices.Contains(field.Ident, "Response") {
|
||||
cut = true
|
||||
return false
|
||||
@@ -285,7 +290,7 @@ func (t *Template) Execute(w io.Writer, v Values) error {
|
||||
})
|
||||
|
||||
tree := parse.Tree{Root: nodes.(*parse.ListNode)}
|
||||
if err := template.Must(template.New("").AddParseTree("", &tree)).Execute(&b, map[string]any{
|
||||
if err := template.Must(tmpl.AddParseTree("", &tree)).Execute(&b, map[string]any{
|
||||
"System": system,
|
||||
"Prompt": prompt,
|
||||
"Response": response,
|
||||
|
||||
@@ -54,7 +54,7 @@ func TestNamed(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if tmpl.Tree.Root.String() == "" {
|
||||
if tmpl.tree.Root.String() == "" {
|
||||
t.Errorf("empty %s template", k)
|
||||
}
|
||||
})
|
||||
@@ -153,7 +153,7 @@ func TestTemplate(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParse(t *testing.T) {
|
||||
func TestParseVars(t *testing.T) {
|
||||
cases := []struct {
|
||||
template string
|
||||
vars []string
|
||||
@@ -181,6 +181,9 @@ func TestParse(t *testing.T) {
|
||||
{{ end }}<|im_start|>assistant
|
||||
{{ .Response }}<|im_end|>
|
||||
{{- end -}}`, []string{"content", "messages", "prompt", "response", "role", "system"}},
|
||||
{"{{ json .Messages }}", []string{"messages"}},
|
||||
// undefined functions should not error
|
||||
{"{{ undefined }}", []string{"response"}},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
@@ -197,6 +200,30 @@ func TestParse(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseExecute(t *testing.T) {
|
||||
t.Run("undefined function", func(t *testing.T) {
|
||||
tmpl, err := Parse(`{{- if .Suffix }}{{ .Prompt }} {{ .Suffix }}{{- else }}{{ undefined }}{{- end }}`)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var b bytes.Buffer
|
||||
if err := tmpl.Execute(&b, Values{Prompt: "def add(", Suffix: " return c"}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(b.String(), "def add( return c"); diff != "" {
|
||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
if err := tmpl.Execute(io.Discard, Values{}); err == nil {
|
||||
t.Fatal("expected error")
|
||||
} else if !strings.Contains(err.Error(), "\"undefined\" is not a defined function") {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestExecuteWithMessages(t *testing.T) {
|
||||
type template struct {
|
||||
name string
|
||||
|
||||
Reference in New Issue
Block a user