Compare commits

...

28 Commits

Author SHA1 Message Date
Jeffrey Morgan
23481167a7 add model_format config var 2023-09-07 13:54:53 -04:00
Alexander Pepper
07b4074e7b [docs] Improve build instructions (#482)
Go is required and not installed by default.
2023-09-07 06:43:26 -04:00
Jeffrey Morgan
61dda6a5e0 set minimum CMAKE_OSX_DEPLOYMENT_TARGET to 11.0 2023-09-06 19:56:50 -04:00
Michael Yang
e1f9ced568 Merge pull request #479 from jmorganca/mxyng/dockerfile
update dockerfile
2023-09-06 15:44:24 -07:00
Michael Yang
9795b43d93 update dockerfile 2023-09-06 15:31:25 -07:00
Michael Yang
0980d5c7e3 Merge pull request #478 from jmorganca/mxyng/cleanup
remove unused openssh key types
2023-09-06 15:18:54 -07:00
Michael Yang
0dae34b6a7 remove unused openssh key types 2023-09-06 14:34:09 -07:00
Michael Yang
83c6be1666 fix model manifests (#477) 2023-09-06 17:30:08 -04:00
Patrick Devine
1adfa67589 tighten up the error string for ollama show flags (#476) 2023-09-06 13:38:49 -07:00
Patrick Devine
790d24eb7b add show command (#474) 2023-09-06 11:04:17 -07:00
Jeffrey Morgan
7de300856b use osPath in gpu check 2023-09-05 21:52:21 -04:00
Jeffrey Morgan
213ffdb548 macos amd64 compatibility fixes 2023-09-05 21:33:31 -04:00
Michael Yang
d42d88386a Merge pull request #473 from jmorganca/mxyng/fix-manifest-path
create manifests directory
2023-09-05 17:37:41 -07:00
Ackermann Yuriy
154f24af91 Added missing options params to the embeddings docs (#472) 2023-09-05 20:18:49 -04:00
Michael Yang
a1ecdd36d5 create manifests directory 2023-09-05 17:10:40 -07:00
Bruce MacDonald
d18282bfda metal: add missing barriers for mul-mat (#469) 2023-09-05 19:37:13 -04:00
Michael Yang
9ae76ba8c9 Merge pull request #471 from jmorganca/mxyng/fix-empty-response
fix empty response
2023-09-05 15:23:05 -07:00
Michael Yang
2bc06565c7 fix empty response 2023-09-05 15:03:24 -07:00
Michael Yang
d1c2558f7e Merge pull request #461 from jmorganca/mxyng/fix-inherit-params
fix inherit params
2023-09-05 12:30:23 -07:00
Michael Yang
7b5aefb427 Merge pull request #462 from jmorganca/mxyng/rm-marshal-prompt
remove marshalPrompt which is no longer needed
2023-09-05 11:48:41 -07:00
Michael Yang
06ef90c051 fix parameter inheritence
parameters are not inherited because they are processed differently from
other layer. fix this by explicitly merging the inherited params into
the new params. parameter values defined in the new modelfile will
override those defined in the inherited modelfile. array lists are
replaced instead of appended
2023-09-05 11:40:20 -07:00
Michael Yang
7efbc84320 Merge pull request #464 from jmorganca/mxyng/fix-num-keep
fix num_keep
2023-09-05 11:30:45 -07:00
Michael Yang
e9f6df7dca use slices.DeleteFunc 2023-09-05 09:56:59 -07:00
Jeffrey Morgan
7fa6e51686 generate binary dependencies based on GOARCH on macos (#459) 2023-09-05 12:53:57 -04:00
Michael Yang
8dc68417e7 Merge pull request #463 from jmorganca/mxyng/fix-last-token
fix not forwarding last token
2023-09-05 09:01:32 -07:00
Michael Yang
681f3c4c42 fix num_keep 2023-09-03 17:47:49 -04:00
Michael Yang
59a705525c fix not forwarding last token 2023-09-03 17:46:50 -04:00
Michael Yang
5d3f314b0b remove marshalPrompt which is no longer needed 2023-09-03 17:01:05 -04:00
22 changed files with 539 additions and 250 deletions

View File

@@ -1,8 +1,4 @@
build
llama/build
.venv
.vscode .vscode
ollama ollama
app app
web llm/llama.cpp/ggml
.env

View File

@@ -1,15 +1,21 @@
FROM golang:1.20 FROM golang:alpine
WORKDIR /go/src/github.com/jmorganca/ollama WORKDIR /go/src/github.com/jmorganca/ollama
RUN apk add --no-cache git build-base cmake
COPY . . COPY . .
RUN CGO_ENABLED=1 go build -ldflags '-linkmode external -extldflags "-static"' . RUN go generate ./... && go build -ldflags '-linkmode external -extldflags "-static"' .
FROM alpine FROM alpine
COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama ENV OLLAMA_HOST 0.0.0.0
EXPOSE 11434 RUN apk add --no-cache libstdc++
ARG USER=ollama ARG USER=ollama
ARG GROUP=ollama ARG GROUP=ollama
RUN addgroup -g 1000 $GROUP && adduser -u 1000 -DG $GROUP $USER RUN addgroup $GROUP && adduser -D -G $GROUP $USER
COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
USER $USER:$GROUP USER $USER:$GROUP
ENTRYPOINT ["/bin/ollama"] ENTRYPOINT ["/bin/ollama"]
ENV OLLAMA_HOST 0.0.0.0
CMD ["serve"] CMD ["serve"]

View File

@@ -165,10 +165,11 @@ Ollama bundles model weights, configurations, and data into a single package, de
## Building ## Building
Install `cmake`: Install `cmake` and `go`:
``` ```
brew install cmake brew install cmake
brew install go
``` ```
Then generate dependencies and build: Then generate dependencies and build:

View File

@@ -255,6 +255,14 @@ func (c *Client) Delete(ctx context.Context, req *DeleteRequest) error {
return nil return nil
} }
func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, error) {
var resp ShowResponse
if err := c.do(ctx, http.MethodPost, "/api/show", req, &resp); err != nil {
return nil, err
}
return &resp, nil
}
func (c *Client) Heartbeat(ctx context.Context) error { func (c *Client) Heartbeat(ctx context.Context) error {
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil { if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
return err return err

View File

@@ -61,6 +61,18 @@ type DeleteRequest struct {
Name string `json:"name"` Name string `json:"name"`
} }
type ShowRequest struct {
Name string `json:"name"`
}
type ShowResponse struct {
License string `json:"license,omitempty"`
Modelfile string `json:"modelfile,omitempty"`
Parameters string `json:"parameters,omitempty"`
Template string `json:"template,omitempty"`
System string `json:"system,omitempty"`
}
type CopyRequest struct { type CopyRequest struct {
Source string `json:"source"` Source string `json:"source"`
Destination string `json:"destination"` Destination string `json:"destination"`

View File

@@ -230,6 +230,84 @@ func DeleteHandler(cmd *cobra.Command, args []string) error {
return nil return nil
} }
func ShowHandler(cmd *cobra.Command, args []string) error {
client, err := api.FromEnv()
if err != nil {
return err
}
if len(args) != 1 {
return errors.New("missing model name")
}
license, errLicense := cmd.Flags().GetBool("license")
modelfile, errModelfile := cmd.Flags().GetBool("modelfile")
parameters, errParams := cmd.Flags().GetBool("parameters")
system, errSystem := cmd.Flags().GetBool("system")
template, errTemplate := cmd.Flags().GetBool("template")
for _, boolErr := range []error{errLicense, errModelfile, errParams, errSystem, errTemplate} {
if boolErr != nil {
return errors.New("error retrieving flags")
}
}
flagsSet := 0
showType := ""
if license {
flagsSet++
showType = "license"
}
if modelfile {
flagsSet++
showType = "modelfile"
}
if parameters {
flagsSet++
showType = "parameters"
}
if system {
flagsSet++
showType = "system"
}
if template {
flagsSet++
showType = "template"
}
if flagsSet > 1 {
return errors.New("only one of '--license', '--modelfile', '--parameters', '--system', or '--template' can be specified")
} else if flagsSet == 0 {
return errors.New("one of '--license', '--modelfile', '--parameters', '--system', or '--template' must be specified")
}
req := api.ShowRequest{Name: args[0]}
resp, err := client.Show(context.Background(), &req)
if err != nil {
return err
}
switch showType {
case "license":
fmt.Println(resp.License)
case "modelfile":
fmt.Println(resp.Modelfile)
case "parameters":
fmt.Println(resp.Parameters)
case "system":
fmt.Println(resp.System)
case "template":
fmt.Println(resp.Template)
}
return nil
}
func CopyHandler(cmd *cobra.Command, args []string) error { func CopyHandler(cmd *cobra.Command, args []string) error {
client, err := api.FromEnv() client, err := api.FromEnv()
if err != nil { if err != nil {
@@ -377,20 +455,6 @@ func generate(cmd *cobra.Command, model, prompt string) error {
return nil return nil
} }
func showLayer(l *server.Layer) {
filename, err := server.GetBlobsPath(l.Digest)
if err != nil {
fmt.Println("Couldn't get layer's path")
return
}
bts, err := os.ReadFile(filename)
if err != nil {
fmt.Println("Couldn't read layer")
return
}
fmt.Println(string(bts))
}
func generateInteractive(cmd *cobra.Command, model string) error { func generateInteractive(cmd *cobra.Command, model string) error {
home, err := os.UserHomeDir() home, err := os.UserHomeDir()
if err != nil { if err != nil {
@@ -413,6 +477,8 @@ func generateInteractive(cmd *cobra.Command, model string) error {
), ),
readline.PcItem("/show", readline.PcItem("/show",
readline.PcItem("license"), readline.PcItem("license"),
readline.PcItem("modelfile"),
readline.PcItem("parameters"),
readline.PcItem("system"), readline.PcItem("system"),
readline.PcItem("template"), readline.PcItem("template"),
), ),
@@ -522,42 +588,28 @@ func generateInteractive(cmd *cobra.Command, model string) error {
case strings.HasPrefix(line, "/show"): case strings.HasPrefix(line, "/show"):
args := strings.Fields(line) args := strings.Fields(line)
if len(args) > 1 { if len(args) > 1 {
mp := server.ParseModelPath(model) resp, err := server.GetModelInfo(model)
if err != nil { if err != nil {
return err fmt.Println("error: couldn't get model")
continue
} }
manifest, _, err := server.GetManifest(mp)
if err != nil {
fmt.Println("error: couldn't get a manifest for this model")
continue
}
switch args[1] { switch args[1] {
case "license": case "license":
for _, l := range manifest.Layers { fmt.Println(resp.License)
if l.MediaType == "application/vnd.ollama.image.license" { case "modelfile":
showLayer(l) fmt.Println(resp.Modelfile)
} case "parameters":
} fmt.Println(resp.Parameters)
continue
case "system": case "system":
for _, l := range manifest.Layers { fmt.Println(resp.System)
if l.MediaType == "application/vnd.ollama.image.system" {
showLayer(l)
}
}
continue
case "template": case "template":
for _, l := range manifest.Layers { fmt.Println(resp.Template)
if l.MediaType == "application/vnd.ollama.image.template" {
showLayer(l)
}
}
continue
default: default:
usage() fmt.Println("error: unknown command")
continue
} }
continue
} else { } else {
usage() usage()
continue continue
@@ -749,6 +801,20 @@ func NewCLI() *cobra.Command {
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")") createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")")
showCmd := &cobra.Command{
Use: "show MODEL",
Short: "Show information for a model",
Args: cobra.MinimumNArgs(1),
PreRunE: checkServerHeartbeat,
RunE: ShowHandler,
}
showCmd.Flags().Bool("license", false, "Show license of a model")
showCmd.Flags().Bool("modelfile", false, "Show Modelfile of a model")
showCmd.Flags().Bool("parameters", false, "Show parameters of a model")
showCmd.Flags().Bool("template", false, "Show template of a model")
showCmd.Flags().Bool("system", false, "Show system prompt of a model")
runCmd := &cobra.Command{ runCmd := &cobra.Command{
Use: "run MODEL [PROMPT]", Use: "run MODEL [PROMPT]",
Short: "Run a model", Short: "Run a model",
@@ -814,6 +880,7 @@ func NewCLI() *cobra.Command {
rootCmd.AddCommand( rootCmd.AddCommand(
serveCmd, serveCmd,
createCmd, createCmd,
showCmd,
runCmd, runCmd,
pullCmd, pullCmd,
pushCmd, pushCmd,

View File

@@ -238,6 +238,10 @@ Generate embeddings from a model
- `model`: name of model to generate embeddings from - `model`: name of model to generate embeddings from
- `prompt`: text to generate embeddings for - `prompt`: text to generate embeddings for
Advanced parameters:
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
### Request ### Request
``` ```

View File

@@ -10,15 +10,11 @@ package format
import ( import (
"crypto" "crypto"
"crypto/ecdsa"
"crypto/ed25519" "crypto/ed25519"
"crypto/elliptic"
"crypto/rand" "crypto/rand"
"crypto/rsa"
"encoding/binary" "encoding/binary"
"encoding/pem" "encoding/pem"
"fmt" "fmt"
"math/big"
"golang.org/x/crypto/ssh" "golang.org/x/crypto/ssh"
) )
@@ -41,25 +37,6 @@ type openSSHPrivateKey struct {
Rest []byte `ssh:"rest"` Rest []byte `ssh:"rest"`
} }
type openSSHRSAPrivateKey struct {
N *big.Int
E *big.Int
D *big.Int
Iqmp *big.Int
P *big.Int
Q *big.Int
Comment string
Pad []byte `ssh:"rest"`
}
type openSSHECDSAPrivateKey struct {
Curve string
Pub []byte
D *big.Int
Comment string
Pad []byte `ssh:"rest"`
}
type openSSHEd25519PrivateKey struct { type openSSHEd25519PrivateKey struct {
Pub []byte Pub []byte
Priv []byte Priv []byte
@@ -85,64 +62,6 @@ func OpenSSHPrivateKey(key crypto.PrivateKey, comment string) (*pem.Block, error
} }
switch k := key.(type) { switch k := key.(type) {
case *rsa.PrivateKey:
e := new(big.Int).SetInt64(int64(k.E))
key := openSSHRSAPrivateKey{
N: k.N,
E: e,
D: k.D,
Iqmp: k.Precomputed.Qinv,
P: k.Primes[0],
Q: k.Primes[1],
Comment: comment,
}
pk1.Keytype = ssh.KeyAlgoRSA
pk1.Rest = ssh.Marshal(key)
w.PubKey = ssh.Marshal(struct {
KeyType string
E *big.Int
N *big.Int
}{
ssh.KeyAlgoRSA, e, k.N,
})
case *ecdsa.PrivateKey:
var curve, keytype string
switch name := k.Curve.Params().Name; name {
case "P-256":
curve = "nistp256"
keytype = ssh.KeyAlgoECDSA256
case "P-384":
curve = "nistp384"
keytype = ssh.KeyAlgoECDSA384
case "P-521":
curve = "nistp521"
keytype = ssh.KeyAlgoECDSA521
default:
return nil, fmt.Errorf("ssh: unknown curve %q", name)
}
pub := elliptic.Marshal(k.Curve, k.X, k.Y)
key := openSSHECDSAPrivateKey{
Curve: curve,
Pub: pub,
D: k.D,
Comment: comment,
}
pk1.Keytype = keytype
pk1.Rest = ssh.Marshal(key)
w.PubKey = ssh.Marshal(struct {
KeyType string
Curve string
Pub []byte
}{
keytype, curve, pub,
})
case ed25519.PrivateKey: case ed25519.PrivateKey:
pub, priv := k[32:], k pub, priv := k[32:], k
key := openSSHEd25519PrivateKey{ key := openSSHEd25519PrivateKey{

1
go.mod
View File

@@ -39,6 +39,7 @@ require (
github.com/ugorji/go/codec v1.2.11 // indirect github.com/ugorji/go/codec v1.2.11 // indirect
golang.org/x/arch v0.3.0 // indirect golang.org/x/arch v0.3.0 // indirect
golang.org/x/crypto v0.10.0 golang.org/x/crypto v0.10.0
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63
golang.org/x/net v0.10.0 // indirect golang.org/x/net v0.10.0 // indirect
golang.org/x/sys v0.11.0 // indirect golang.org/x/sys v0.11.0 // indirect
golang.org/x/term v0.10.0 golang.org/x/term v0.10.0

2
go.sum
View File

@@ -121,6 +121,8 @@ golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM= golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I= golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
golang.org/x/exp v0.0.0-20230321023759-10a507213a29 h1:ooxPy7fPvB4kwsA2h+iBNHkAbp/4JxTSwCmvdjEYmug= golang.org/x/exp v0.0.0-20230321023759-10a507213a29 h1:ooxPy7fPvB4kwsA2h+iBNHkAbp/4JxTSwCmvdjEYmug=
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=

View File

@@ -45,6 +45,7 @@ func osPath(llamaPath string) string {
if runtime.GOOS == "windows" { if runtime.GOOS == "windows" {
return path.Join(llamaPath, "Release") return path.Join(llamaPath, "Release")
} }
return llamaPath return llamaPath
} }
@@ -68,7 +69,9 @@ func initGGML() {
case "windows": case "windows":
files = []string{"server.exe"} files = []string{"server.exe"}
case "darwin": case "darwin":
files = append(files, "ggml-metal.metal") if llamaPath == osPath(ggmlGPU) {
files = append(files, "ggml-metal.metal")
}
} }
for _, f := range files { for _, f := range files {
@@ -286,8 +289,8 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti
runner.Path, runner.Path,
append(params, "--port", strconv.Itoa(port))..., append(params, "--port", strconv.Itoa(port))...,
) )
var stderr bytes.Buffer cmd.Stdout = os.Stderr
cmd.Stderr = &stderr cmd.Stderr = os.Stderr
llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel}} llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel}}
@@ -353,11 +356,6 @@ func (llm *llama) SetOptions(opts api.Options) {
llm.Options = opts llm.Options = opts
} }
type Prediction struct {
Content string `json:"content"`
Stop bool `json:"stop"`
}
type GenerationSettings struct { type GenerationSettings struct {
FrequencyPenalty float64 `json:"frequency_penalty"` FrequencyPenalty float64 `json:"frequency_penalty"`
IgnoreEOS bool `json:"ignore_eos"` IgnoreEOS bool `json:"ignore_eos"`
@@ -385,31 +383,19 @@ type GenerationSettings struct {
} }
type Timings struct { type Timings struct {
PredictedMS float64 `json:"predicted_ms"` PredictedN int `json:"predicted_n"`
PredictedN int `json:"predicted_n"` PredictedMS float64 `json:"predicted_ms"`
PredictedPerSecond float64 `json:"predicted_per_second"` PromptN int `json:"prompt_n"`
PredictedPerTokenMS float64 `json:"predicted_per_token_ms"` PromptMS float64 `json:"prompt_ms"`
PromptMS float64 `json:"prompt_ms"`
PromptN int `json:"prompt_n"`
PromptPerSecond float64 `json:"prompt_per_second"`
PromptPerTokenMS float64 `json:"prompt_per_token_ms"`
} }
type PredictComplete struct { type Prediction struct {
Content string `json:"content"` Content string `json:"content"`
GenerationSettings GenerationSettings `json:"generation_settings"` Model string `json:"model"`
Model string `json:"model"` Prompt string `json:"prompt"`
Prompt string `json:"prompt"` Stop bool `json:"stop"`
Stop bool `json:"stop"`
StoppedEOS bool `json:"stopped_eos"` Timings `json:"timings"`
StoppedLimit bool `json:"stopped_limit"`
StoppedWord bool `json:"stopped_word"`
StoppingWord string `json:"stopping_word"`
Timings Timings `json:"timings"`
TokensCached int `json:"tokens_cached"`
TokensEvaluated int `json:"tokens_evaluated"`
TokensPredicted int `json:"tokens_predicted"`
Truncated bool `json:"truncated"`
} }
type PredictRequest struct { type PredictRequest struct {
@@ -437,15 +423,19 @@ type PredictRequest struct {
Stop []string `json:"stop,omitempty"` Stop []string `json:"stop,omitempty"`
} }
func (llm *llama) Predict(ctx context.Context, predictCtx []int, prompt string, fn func(api.GenerateResponse)) error { func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error {
// we need to find the trimmed prompt context before predicting so that we can return it to the client prevConvo, err := llm.Decode(ctx, prevContext)
trimmedPrompt, err := llm.marshalPrompt(ctx, predictCtx, prompt)
if err != nil { if err != nil {
return fmt.Errorf("marshaling prompt: %v", err) return err
} }
var nextContext strings.Builder
nextContext.WriteString(prevConvo)
nextContext.WriteString(prompt)
endpoint := fmt.Sprintf("http://127.0.0.1:%d/completion", llm.Port) endpoint := fmt.Sprintf("http://127.0.0.1:%d/completion", llm.Port)
predReq := PredictRequest{ predReq := PredictRequest{
Prompt: trimmedPrompt, Prompt: nextContext.String(),
Stream: true, Stream: true,
NPredict: llm.NumPredict, NPredict: llm.NumPredict,
NKeep: llm.NumKeep, NKeep: llm.NumKeep,
@@ -491,7 +481,6 @@ func (llm *llama) Predict(ctx context.Context, predictCtx []int, prompt string,
} }
scanner := bufio.NewScanner(resp.Body) scanner := bufio.NewScanner(resp.Body)
genCtx := trimmedPrompt // start with the trimmed prompt
for scanner.Scan() { for scanner.Scan() {
select { select {
case <-ctx.Done(): case <-ctx.Done():
@@ -506,34 +495,33 @@ func (llm *llama) Predict(ctx context.Context, predictCtx []int, prompt string,
// Read data from the server-side event stream // Read data from the server-side event stream
if strings.HasPrefix(line, "data: ") { if strings.HasPrefix(line, "data: ") {
evt := line[6:] evt := line[6:]
var complete PredictComplete var p Prediction
if err := json.Unmarshal([]byte(evt), &complete); err != nil { if err := json.Unmarshal([]byte(evt), &p); err != nil {
return fmt.Errorf("error unmarshaling llm complete response: %v", err) return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
} }
if complete.Timings.PredictedMS > 0 { if p.Content != "" {
genCtx += complete.Content fn(api.GenerateResponse{Response: p.Content})
embd, err := llm.Encode(ctx, genCtx) nextContext.WriteString(p.Content)
}
if p.Stop {
embd, err := llm.Encode(ctx, nextContext.String())
if err != nil { if err != nil {
return fmt.Errorf("encoding context: %v", err) return fmt.Errorf("encoding context: %v", err)
} }
fn(api.GenerateResponse{ fn(api.GenerateResponse{
Done: true, Done: true,
Context: embd, Context: embd,
PromptEvalCount: int(complete.Timings.PromptN), PromptEvalCount: p.PromptN,
PromptEvalDuration: parseDurationMs(float64(complete.Timings.PromptMS)), PromptEvalDuration: parseDurationMs(p.PromptMS),
EvalCount: int(complete.Timings.PredictedN), EvalCount: p.PredictedN,
EvalDuration: parseDurationMs(float64(complete.Timings.PredictedMS)), EvalDuration: parseDurationMs(p.PredictedMS),
}) })
return nil return nil
} }
var pred Prediction
if err := json.Unmarshal([]byte(evt), &pred); err != nil {
return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
}
genCtx += pred.Content
fn(api.GenerateResponse{Response: pred.Content})
} }
} }
} }
@@ -545,34 +533,6 @@ func (llm *llama) Predict(ctx context.Context, predictCtx []int, prompt string,
return nil return nil
} }
func (llm *llama) marshalPrompt(ctx context.Context, pCtx []int, prompt string) (string, error) {
pEncode, err := llm.Encode(ctx, prompt)
if err != nil {
return "", fmt.Errorf("encoding prompt context: %w", err)
}
tokens := append(pCtx, pEncode...)
if llm.NumKeep < 0 {
llm.NumKeep = len(tokens)
}
// min(llm.NumCtx - 4, llm.NumKeep)
if llm.NumCtx-4 < llm.NumKeep {
llm.NumKeep = llm.NumCtx - 4
}
if len(tokens) >= llm.NumCtx {
// truncate input
numLeft := (llm.NumCtx - llm.NumKeep) / 2
truncated := tokens[:llm.NumKeep]
erasedBlocks := (len(tokens) - llm.NumKeep - numLeft - 1) / numLeft
truncated = append(truncated, tokens[llm.NumKeep+erasedBlocks*numLeft:]...)
tokens = truncated
log.Printf("input truncated: num_ctx=%d num_keep=%d num_left=%d num_tokens=%d", llm.NumCtx, llm.NumKeep, numLeft, len(truncated))
}
return llm.Decode(ctx, tokens)
}
type TokenizeRequest struct { type TokenizeRequest struct {
Content string `json:"content"` Content string `json:"content"`
} }

View File

@@ -1,8 +1,13 @@
//go:build !darwin
// +build !darwin
package llm package llm
//go:generate git submodule init //go:generate git submodule init
//go:generate git submodule update --force ggml //go:generate git submodule update --force ggml
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch //go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch //go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on //go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cpu --target server --config Release //go:generate cmake --build ggml/build/cpu --target server --config Release

View File

@@ -1,11 +0,0 @@
//go:build darwin
// +build darwin
package llm
//go:generate git submodule init
//go:generate git submodule update --force ggml
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
//go:generate cmake -S ggml -B ggml/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/gpu --target server --config Release

View File

@@ -0,0 +1,10 @@
package llm
//go:generate git submodule init
//go:generate git submodule update --force ggml
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build ggml/build/cpu --target server --config Release

View File

@@ -0,0 +1,10 @@
package llm
//go:generate git submodule init
//go:generate git submodule update --force ggml
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake --fresh -S ggml -B ggml/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build ggml/build/gpu --target server --config Release

View File

@@ -0,0 +1,32 @@
From 8c0ea847ac1460bca534d92266e3471cb31471be Mon Sep 17 00:00:00 2001
From: Bruce MacDonald <brucewmacdonald@gmail.com>
Date: Tue, 5 Sep 2023 16:05:08 -0400
Subject: [PATCH] metal: add missing barriers for mul-mat #2699
---
ggml-metal.metal | 2 ++
1 file changed, 2 insertions(+)
diff --git a/ggml-metal.metal b/ggml-metal.metal
index 3f31252..ce3541f 100644
--- a/ggml-metal.metal
+++ b/ggml-metal.metal
@@ -1850,6 +1850,7 @@ kernel void kernel_mul_mm(device const uchar * src0,
//load data and store to threadgroup memory
half4x4 temp_a;
dequantize_func(x, il, temp_a);
+ threadgroup_barrier(mem_flags::mem_threadgroup);
#pragma unroll(16)
for (int i = 0; i < 16; i++) {
*(sa + SG_MAT_SIZE * ((tiitg / THREAD_PER_ROW / 8) \
@@ -1895,6 +1896,7 @@ kernel void kernel_mul_mm(device const uchar * src0,
}
} else {
// block is smaller than 64x32, we should avoid writing data outside of the matrix
+ threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
+ 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
for (int i = 0; i < 8; i++) {
--
2.39.2 (Apple Git-143)

View File

@@ -0,0 +1,30 @@
From dadbed99e65252d79f81101a392d0d6497b86caa Mon Sep 17 00:00:00 2001
From: Shouzheng Liu <lshzh.hi@gmail.com>
Date: Mon, 21 Aug 2023 06:59:29 -0400
Subject: [PATCH] metal : fix synchronization in new matrix multiplication
kernel (#2686)
---
ggml-metal.metal | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/ggml-metal.metal b/ggml-metal.metal
index 3f31252..88d48f6 100644
--- a/ggml-metal.metal
+++ b/ggml-metal.metal
@@ -1898,10 +1898,11 @@ kernel void kernel_mul_mm(device const uchar * src0,
threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
+ 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
for (int i = 0; i < 8; i++) {
+ threadgroup_barrier(mem_flags::mem_device);
simdgroup_store(c_res[i], temp_str + 8 * (i%4) + 8 * BLOCK_SIZE_M * (i/4), BLOCK_SIZE_M);
}
- threadgroup_barrier(mem_flags::mem_threadgroup);
+ threadgroup_barrier(mem_flags::mem_device);
device float *C = dst + BLOCK_SIZE_M * r0 + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0;
if (sgitg==0) {
for (int i = 0; i < n_rows; i++) {
--
2.41.0

View File

@@ -0,0 +1,41 @@
From 14b1d7e6f720dee41ce5a826376df738096d9033 Mon Sep 17 00:00:00 2001
From: Shouzheng Liu <lshzh.hi@gmail.com>
Date: Tue, 22 Aug 2023 02:18:40 -0400
Subject: [PATCH] metal : add missing barriers for mul-mat (#2699)
---
ggml-metal.metal | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/ggml-metal.metal b/ggml-metal.metal
index 88d48f6..ce3541f 100644
--- a/ggml-metal.metal
+++ b/ggml-metal.metal
@@ -1850,6 +1850,7 @@ kernel void kernel_mul_mm(device const uchar * src0,
//load data and store to threadgroup memory
half4x4 temp_a;
dequantize_func(x, il, temp_a);
+ threadgroup_barrier(mem_flags::mem_threadgroup);
#pragma unroll(16)
for (int i = 0; i < 16; i++) {
*(sa + SG_MAT_SIZE * ((tiitg / THREAD_PER_ROW / 8) \
@@ -1895,14 +1896,14 @@ kernel void kernel_mul_mm(device const uchar * src0,
}
} else {
// block is smaller than 64x32, we should avoid writing data outside of the matrix
+ threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
+ 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
for (int i = 0; i < 8; i++) {
- threadgroup_barrier(mem_flags::mem_device);
simdgroup_store(c_res[i], temp_str + 8 * (i%4) + 8 * BLOCK_SIZE_M * (i/4), BLOCK_SIZE_M);
}
- threadgroup_barrier(mem_flags::mem_device);
+ threadgroup_barrier(mem_flags::mem_threadgroup);
device float *C = dst + BLOCK_SIZE_M * r0 + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0;
if (sgitg==0) {
for (int i = 0; i < n_rows; i++) {
--
2.41.0

View File

@@ -6,8 +6,11 @@ GO_LDFLAGS="-X github.com/jmorganca/ollama/version.Version=$VERSION"
GO_LDFLAGS="$GO_LDFLAGS -X github.com/jmorganca/ollama/server.mode=release" GO_LDFLAGS="$GO_LDFLAGS -X github.com/jmorganca/ollama/server.mode=release"
# build universal binary # build universal binary
CGO_ENABLED=1 GOARCH=arm64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-arm64 GOARCH=arm64 go generate ./...
CGO_ENABLED=1 GOARCH=amd64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-amd64 GOARCH=arm64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-arm64
rm -rf llm/llama.cpp/ggml/build/*/bin
GOARCH=amd64 go generate ./...
GOARCH=amd64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-amd64
lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64 lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
rm dist/ollama-darwin-amd64 dist/ollama-darwin-arm64 rm dist/ollama-darwin-amd64 dist/ollama-darwin-arm64
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama

View File

@@ -22,6 +22,8 @@ import (
"strings" "strings"
"text/template" "text/template"
"golang.org/x/exp/slices"
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llm" "github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/parser" "github.com/jmorganca/ollama/parser"
@@ -39,15 +41,18 @@ type RegistryOptions struct {
} }
type Model struct { type Model struct {
Name string `json:"name"` Name string `json:"name"`
ModelPath string ShortName string
AdapterPaths []string ModelPath string
Template string OriginalModel string
System string AdapterPaths []string
Digest string Template string
ConfigDigest string System string
Options map[string]interface{} License []string
Embeddings []vector.Embedding Digest string
ConfigDigest string
Options map[string]interface{}
Embeddings []vector.Embedding
} }
func (m *Model) Prompt(request api.GenerateRequest, embedding string) (string, error) { func (m *Model) Prompt(request api.GenerateRequest, embedding string) (string, error) {
@@ -111,6 +116,7 @@ type LayerReader struct {
type ConfigV2 struct { type ConfigV2 struct {
ModelFamily llm.ModelFamily `json:"model_family"` ModelFamily llm.ModelFamily `json:"model_family"`
ModelType string `json:"model_type"` ModelType string `json:"model_type"`
ModelFormat string `json:"model_format"`
FileType string `json:"file_type"` FileType string `json:"file_type"`
RootFS RootFS `json:"rootfs"` RootFS RootFS `json:"rootfs"`
@@ -169,9 +175,11 @@ func GetModel(name string) (*Model, error) {
model := &Model{ model := &Model{
Name: mp.GetFullTagname(), Name: mp.GetFullTagname(),
ShortName: mp.GetShortTagname(),
Digest: digest, Digest: digest,
ConfigDigest: manifest.Config.Digest, ConfigDigest: manifest.Config.Digest,
Template: "{{ .Prompt }}", Template: "{{ .Prompt }}",
License: []string{},
} }
for _, layer := range manifest.Layers { for _, layer := range manifest.Layers {
@@ -183,6 +191,7 @@ func GetModel(name string) (*Model, error) {
switch layer.MediaType { switch layer.MediaType {
case "application/vnd.ollama.image.model": case "application/vnd.ollama.image.model":
model.ModelPath = filename model.ModelPath = filename
model.OriginalModel = layer.From
case "application/vnd.ollama.image.embed": case "application/vnd.ollama.image.embed":
file, err := os.Open(filename) file, err := os.Open(filename)
if err != nil { if err != nil {
@@ -227,6 +236,12 @@ func GetModel(name string) (*Model, error) {
if err = json.NewDecoder(params).Decode(&model.Options); err != nil { if err = json.NewDecoder(params).Decode(&model.Options); err != nil {
return nil, err return nil, err
} }
case "application/vnd.ollama.image.license":
bts, err := os.ReadFile(filename)
if err != nil {
return nil, err
}
model.License = append(model.License, string(bts))
} }
} }
@@ -274,6 +289,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
var layers []*LayerReader var layers []*LayerReader
params := make(map[string][]string) params := make(map[string][]string)
var sourceParams map[string]any
embed := EmbeddingParams{fn: fn} embed := EmbeddingParams{fn: fn}
for _, c := range commands { for _, c := range commands {
log.Printf("[%s] - %s\n", c.Name, c.Args) log.Printf("[%s] - %s\n", c.Name, c.Args)
@@ -320,6 +336,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
config.ModelFamily = ggml.ModelFamily() config.ModelFamily = ggml.ModelFamily()
config.ModelType = ggml.ModelType().String() config.ModelType = ggml.ModelType().String()
config.ModelFormat = ggml.Name()
config.FileType = ggml.FileType().String() config.FileType = ggml.FileType().String()
// reset the file // reset the file
@@ -351,12 +368,30 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
return err return err
} }
// copie the model metadata // copy the model metadata
config.ModelFamily = source.ModelFamily config.ModelFamily = source.ModelFamily
config.ModelType = source.ModelType config.ModelType = source.ModelType
config.ModelFormat = source.ModelFormat
config.FileType = source.FileType config.FileType = source.FileType
for _, l := range mf.Layers { for _, l := range mf.Layers {
if l.MediaType == "application/vnd.ollama.image.params" {
sourceParamsBlobPath, err := GetBlobsPath(l.Digest)
if err != nil {
return err
}
sourceParamsBlob, err := os.Open(sourceParamsBlobPath)
if err != nil {
return err
}
defer sourceParamsBlob.Close()
if err := json.NewDecoder(sourceParamsBlob).Decode(&sourceParams); err != nil {
return err
}
}
newLayer, err := GetLayerWithBufferFromLayer(l) newLayer, err := GetLayerWithBufferFromLayer(l)
if err != nil { if err != nil {
return err return err
@@ -427,12 +462,19 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
// Create a single layer for the parameters // Create a single layer for the parameters
if len(params) > 0 { if len(params) > 0 {
fn(api.ProgressResponse{Status: "creating parameter layer"}) fn(api.ProgressResponse{Status: "creating parameter layer"})
layers = removeLayerFromLayers(layers, "application/vnd.ollama.image.params") layers = removeLayerFromLayers(layers, "application/vnd.ollama.image.params")
formattedParams, err := formatParams(params) formattedParams, err := formatParams(params)
if err != nil { if err != nil {
return fmt.Errorf("couldn't create params json: %v", err) return fmt.Errorf("couldn't create params json: %v", err)
} }
for k, v := range sourceParams {
if _, ok := formattedParams[k]; !ok {
formattedParams[k] = v
}
}
bts, err := json.Marshal(formattedParams) bts, err := json.Marshal(formattedParams)
if err != nil { if err != nil {
return err return err
@@ -630,14 +672,9 @@ func existingFileEmbeddings(digest string) (map[string][]float64, error) {
} }
func removeLayerFromLayers(layers []*LayerReader, mediaType string) []*LayerReader { func removeLayerFromLayers(layers []*LayerReader, mediaType string) []*LayerReader {
j := 0 return slices.DeleteFunc(layers, func(layer *LayerReader) bool {
for _, l := range layers { return layer.MediaType == mediaType
if l.MediaType != mediaType { })
layers[j] = l
j++
}
}
return layers[:j]
} }
func SaveLayers(layers []*LayerReader, fn func(resp api.ProgressResponse), force bool) error { func SaveLayers(layers []*LayerReader, fn func(resp api.ProgressResponse), force bool) error {
@@ -911,6 +948,83 @@ func DeleteModel(name string) error {
return nil return nil
} }
func ShowModelfile(model *Model) (string, error) {
type modelTemplate struct {
*Model
From string
Params string
}
var params []string
for k, v := range model.Options {
switch val := v.(type) {
case string:
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, val))
case int:
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.Itoa(val)))
case float64:
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.FormatFloat(val, 'f', 0, 64)))
case bool:
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.FormatBool(val)))
case []interface{}:
for _, nv := range val {
switch nval := nv.(type) {
case string:
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, nval))
case int:
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.Itoa(nval)))
case float64:
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.FormatFloat(nval, 'f', 0, 64)))
case bool:
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.FormatBool(nval)))
default:
log.Printf("unknown type: %s", reflect.TypeOf(nv).String())
}
}
default:
log.Printf("unknown type: %s", reflect.TypeOf(v).String())
}
}
mt := modelTemplate{
Model: model,
From: model.OriginalModel,
Params: strings.Join(params, "\n"),
}
if mt.From == "" {
mt.From = model.ModelPath
}
modelFile := `# Modelfile generated by "ollama show"
# To build a new Modelfile based on this one, replace the FROM line with:
# FROM {{ .ShortName }}
FROM {{ .From }}
TEMPLATE """{{ .Template }}"""
SYSTEM """{{ .System }}"""
{{ .Params }}
`
for _, l := range mt.Model.AdapterPaths {
modelFile += fmt.Sprintf("ADAPTER %s\n", l)
}
tmpl, err := template.New("").Parse(modelFile)
if err != nil {
log.Printf("error parsing template: %q", err)
return "", err
}
var buf bytes.Buffer
if err = tmpl.Execute(&buf, mt); err != nil {
log.Printf("error executing template: %q", err)
return "", err
}
return buf.String(), nil
}
func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn func(api.ProgressResponse)) error { func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn func(api.ProgressResponse)) error {
mp := ParseModelPath(name) mp := ParseModelPath(name)
fn(api.ProgressResponse{Status: "retrieving manifest"}) fn(api.ProgressResponse{Status: "retrieving manifest"})

View File

@@ -114,7 +114,12 @@ func GetManifestPath() (string, error) {
return "", err return "", err
} }
return filepath.Join(home, ".ollama", "models", "manifests"), nil path := filepath.Join(home, ".ollama", "models", "manifests")
if err := os.MkdirAll(path, 0o755); err != nil {
return "", err
}
return path, nil
} }
func GetBlobsPath(digest string) (string, error) { func GetBlobsPath(digest string) (string, error) {

View File

@@ -12,6 +12,7 @@ import (
"os/signal" "os/signal"
"path/filepath" "path/filepath"
"reflect" "reflect"
"strconv"
"strings" "strings"
"sync" "sync"
"syscall" "syscall"
@@ -117,12 +118,13 @@ func load(ctx context.Context, model *Model, reqOpts map[string]interface{}, ses
if err != nil { if err != nil {
return err return err
} }
tokensNoSystem, err := llmModel.Encode(ctx, promptNoSystem) tokensNoSystem, err := llmModel.Encode(ctx, promptNoSystem)
if err != nil { if err != nil {
return err return err
} }
opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1 opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem)
llmModel.SetOptions(opts) llmModel.SetOptions(opts)
} }
@@ -363,6 +365,77 @@ func DeleteModelHandler(c *gin.Context) {
} }
} }
func ShowModelHandler(c *gin.Context) {
var req api.ShowRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
resp, err := GetModelInfo(req.Name)
if err != nil {
if os.IsNotExist(err) {
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
} else {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
}
return
}
c.JSON(http.StatusOK, resp)
}
func GetModelInfo(name string) (*api.ShowResponse, error) {
model, err := GetModel(name)
if err != nil {
return nil, err
}
resp := &api.ShowResponse{
License: strings.Join(model.License, "\n"),
System: model.System,
Template: model.Template,
}
mf, err := ShowModelfile(model)
if err != nil {
return nil, err
}
resp.Modelfile = mf
var params []string
cs := 30
for k, v := range model.Options {
switch val := v.(type) {
case string:
params = append(params, fmt.Sprintf("%-*s %s", cs, k, val))
case int:
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(val)))
case float64:
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(val, 'f', 0, 64)))
case bool:
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(val)))
case []interface{}:
for _, nv := range val {
switch nval := nv.(type) {
case string:
params = append(params, fmt.Sprintf("%-*s %s", cs, k, nval))
case int:
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(nval)))
case float64:
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(nval, 'f', 0, 64)))
case bool:
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(nval)))
}
}
}
}
resp.Parameters = strings.Join(params, "\n")
return resp, nil
}
func ListModelsHandler(c *gin.Context) { func ListModelsHandler(c *gin.Context) {
var models []api.ModelResponse var models []api.ModelResponse
fp, err := GetManifestPath() fp, err := GetManifestPath()
@@ -456,6 +529,7 @@ func Serve(ln net.Listener, origins []string) error {
r.POST("/api/copy", CopyModelHandler) r.POST("/api/copy", CopyModelHandler)
r.GET("/api/tags", ListModelsHandler) r.GET("/api/tags", ListModelsHandler)
r.DELETE("/api/delete", DeleteModelHandler) r.DELETE("/api/delete", DeleteModelHandler)
r.POST("/api/show", ShowModelHandler)
log.Printf("Listening on %s", ln.Addr()) log.Printf("Listening on %s", ln.Addr())
s := &http.Server{ s := &http.Server{