mirror of
https://github.com/ollama/ollama.git
synced 2026-04-22 00:36:11 +02:00
Compare commits
28 Commits
mxyng/extr
...
format-con
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
23481167a7 | ||
|
|
07b4074e7b | ||
|
|
61dda6a5e0 | ||
|
|
e1f9ced568 | ||
|
|
9795b43d93 | ||
|
|
0980d5c7e3 | ||
|
|
0dae34b6a7 | ||
|
|
83c6be1666 | ||
|
|
1adfa67589 | ||
|
|
790d24eb7b | ||
|
|
7de300856b | ||
|
|
213ffdb548 | ||
|
|
d42d88386a | ||
|
|
154f24af91 | ||
|
|
a1ecdd36d5 | ||
|
|
d18282bfda | ||
|
|
9ae76ba8c9 | ||
|
|
2bc06565c7 | ||
|
|
d1c2558f7e | ||
|
|
7b5aefb427 | ||
|
|
06ef90c051 | ||
|
|
7efbc84320 | ||
|
|
e9f6df7dca | ||
|
|
7fa6e51686 | ||
|
|
8dc68417e7 | ||
|
|
681f3c4c42 | ||
|
|
59a705525c | ||
|
|
5d3f314b0b |
@@ -1,8 +1,4 @@
|
|||||||
build
|
|
||||||
llama/build
|
|
||||||
.venv
|
|
||||||
.vscode
|
.vscode
|
||||||
ollama
|
ollama
|
||||||
app
|
app
|
||||||
web
|
llm/llama.cpp/ggml
|
||||||
.env
|
|
||||||
|
|||||||
18
Dockerfile
18
Dockerfile
@@ -1,15 +1,21 @@
|
|||||||
FROM golang:1.20
|
FROM golang:alpine
|
||||||
|
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||||
|
RUN apk add --no-cache git build-base cmake
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN CGO_ENABLED=1 go build -ldflags '-linkmode external -extldflags "-static"' .
|
RUN go generate ./... && go build -ldflags '-linkmode external -extldflags "-static"' .
|
||||||
|
|
||||||
FROM alpine
|
FROM alpine
|
||||||
COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
|
ENV OLLAMA_HOST 0.0.0.0
|
||||||
EXPOSE 11434
|
RUN apk add --no-cache libstdc++
|
||||||
|
|
||||||
ARG USER=ollama
|
ARG USER=ollama
|
||||||
ARG GROUP=ollama
|
ARG GROUP=ollama
|
||||||
RUN addgroup -g 1000 $GROUP && adduser -u 1000 -DG $GROUP $USER
|
RUN addgroup $GROUP && adduser -D -G $GROUP $USER
|
||||||
|
|
||||||
|
COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
|
||||||
|
|
||||||
USER $USER:$GROUP
|
USER $USER:$GROUP
|
||||||
ENTRYPOINT ["/bin/ollama"]
|
ENTRYPOINT ["/bin/ollama"]
|
||||||
ENV OLLAMA_HOST 0.0.0.0
|
|
||||||
CMD ["serve"]
|
CMD ["serve"]
|
||||||
|
|||||||
@@ -165,10 +165,11 @@ Ollama bundles model weights, configurations, and data into a single package, de
|
|||||||
|
|
||||||
## Building
|
## Building
|
||||||
|
|
||||||
Install `cmake`:
|
Install `cmake` and `go`:
|
||||||
|
|
||||||
```
|
```
|
||||||
brew install cmake
|
brew install cmake
|
||||||
|
brew install go
|
||||||
```
|
```
|
||||||
|
|
||||||
Then generate dependencies and build:
|
Then generate dependencies and build:
|
||||||
|
|||||||
@@ -255,6 +255,14 @@ func (c *Client) Delete(ctx context.Context, req *DeleteRequest) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, error) {
|
||||||
|
var resp ShowResponse
|
||||||
|
if err := c.do(ctx, http.MethodPost, "/api/show", req, &resp); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (c *Client) Heartbeat(ctx context.Context) error {
|
func (c *Client) Heartbeat(ctx context.Context) error {
|
||||||
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
|
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|||||||
12
api/types.go
12
api/types.go
@@ -61,6 +61,18 @@ type DeleteRequest struct {
|
|||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ShowRequest struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ShowResponse struct {
|
||||||
|
License string `json:"license,omitempty"`
|
||||||
|
Modelfile string `json:"modelfile,omitempty"`
|
||||||
|
Parameters string `json:"parameters,omitempty"`
|
||||||
|
Template string `json:"template,omitempty"`
|
||||||
|
System string `json:"system,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type CopyRequest struct {
|
type CopyRequest struct {
|
||||||
Source string `json:"source"`
|
Source string `json:"source"`
|
||||||
Destination string `json:"destination"`
|
Destination string `json:"destination"`
|
||||||
|
|||||||
149
cmd/cmd.go
149
cmd/cmd.go
@@ -230,6 +230,84 @@ func DeleteHandler(cmd *cobra.Command, args []string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ShowHandler(cmd *cobra.Command, args []string) error {
|
||||||
|
client, err := api.FromEnv()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(args) != 1 {
|
||||||
|
return errors.New("missing model name")
|
||||||
|
}
|
||||||
|
|
||||||
|
license, errLicense := cmd.Flags().GetBool("license")
|
||||||
|
modelfile, errModelfile := cmd.Flags().GetBool("modelfile")
|
||||||
|
parameters, errParams := cmd.Flags().GetBool("parameters")
|
||||||
|
system, errSystem := cmd.Flags().GetBool("system")
|
||||||
|
template, errTemplate := cmd.Flags().GetBool("template")
|
||||||
|
|
||||||
|
for _, boolErr := range []error{errLicense, errModelfile, errParams, errSystem, errTemplate} {
|
||||||
|
if boolErr != nil {
|
||||||
|
return errors.New("error retrieving flags")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
flagsSet := 0
|
||||||
|
showType := ""
|
||||||
|
|
||||||
|
if license {
|
||||||
|
flagsSet++
|
||||||
|
showType = "license"
|
||||||
|
}
|
||||||
|
|
||||||
|
if modelfile {
|
||||||
|
flagsSet++
|
||||||
|
showType = "modelfile"
|
||||||
|
}
|
||||||
|
|
||||||
|
if parameters {
|
||||||
|
flagsSet++
|
||||||
|
showType = "parameters"
|
||||||
|
}
|
||||||
|
|
||||||
|
if system {
|
||||||
|
flagsSet++
|
||||||
|
showType = "system"
|
||||||
|
}
|
||||||
|
|
||||||
|
if template {
|
||||||
|
flagsSet++
|
||||||
|
showType = "template"
|
||||||
|
}
|
||||||
|
|
||||||
|
if flagsSet > 1 {
|
||||||
|
return errors.New("only one of '--license', '--modelfile', '--parameters', '--system', or '--template' can be specified")
|
||||||
|
} else if flagsSet == 0 {
|
||||||
|
return errors.New("one of '--license', '--modelfile', '--parameters', '--system', or '--template' must be specified")
|
||||||
|
}
|
||||||
|
|
||||||
|
req := api.ShowRequest{Name: args[0]}
|
||||||
|
resp, err := client.Show(context.Background(), &req)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch showType {
|
||||||
|
case "license":
|
||||||
|
fmt.Println(resp.License)
|
||||||
|
case "modelfile":
|
||||||
|
fmt.Println(resp.Modelfile)
|
||||||
|
case "parameters":
|
||||||
|
fmt.Println(resp.Parameters)
|
||||||
|
case "system":
|
||||||
|
fmt.Println(resp.System)
|
||||||
|
case "template":
|
||||||
|
fmt.Println(resp.Template)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func CopyHandler(cmd *cobra.Command, args []string) error {
|
func CopyHandler(cmd *cobra.Command, args []string) error {
|
||||||
client, err := api.FromEnv()
|
client, err := api.FromEnv()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -377,20 +455,6 @@ func generate(cmd *cobra.Command, model, prompt string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func showLayer(l *server.Layer) {
|
|
||||||
filename, err := server.GetBlobsPath(l.Digest)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("Couldn't get layer's path")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
bts, err := os.ReadFile(filename)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("Couldn't read layer")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
fmt.Println(string(bts))
|
|
||||||
}
|
|
||||||
|
|
||||||
func generateInteractive(cmd *cobra.Command, model string) error {
|
func generateInteractive(cmd *cobra.Command, model string) error {
|
||||||
home, err := os.UserHomeDir()
|
home, err := os.UserHomeDir()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -413,6 +477,8 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
|||||||
),
|
),
|
||||||
readline.PcItem("/show",
|
readline.PcItem("/show",
|
||||||
readline.PcItem("license"),
|
readline.PcItem("license"),
|
||||||
|
readline.PcItem("modelfile"),
|
||||||
|
readline.PcItem("parameters"),
|
||||||
readline.PcItem("system"),
|
readline.PcItem("system"),
|
||||||
readline.PcItem("template"),
|
readline.PcItem("template"),
|
||||||
),
|
),
|
||||||
@@ -522,42 +588,28 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
|||||||
case strings.HasPrefix(line, "/show"):
|
case strings.HasPrefix(line, "/show"):
|
||||||
args := strings.Fields(line)
|
args := strings.Fields(line)
|
||||||
if len(args) > 1 {
|
if len(args) > 1 {
|
||||||
mp := server.ParseModelPath(model)
|
resp, err := server.GetModelInfo(model)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
fmt.Println("error: couldn't get model")
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
manifest, _, err := server.GetManifest(mp)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("error: couldn't get a manifest for this model")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
switch args[1] {
|
switch args[1] {
|
||||||
case "license":
|
case "license":
|
||||||
for _, l := range manifest.Layers {
|
fmt.Println(resp.License)
|
||||||
if l.MediaType == "application/vnd.ollama.image.license" {
|
case "modelfile":
|
||||||
showLayer(l)
|
fmt.Println(resp.Modelfile)
|
||||||
}
|
case "parameters":
|
||||||
}
|
fmt.Println(resp.Parameters)
|
||||||
continue
|
|
||||||
case "system":
|
case "system":
|
||||||
for _, l := range manifest.Layers {
|
fmt.Println(resp.System)
|
||||||
if l.MediaType == "application/vnd.ollama.image.system" {
|
|
||||||
showLayer(l)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
case "template":
|
case "template":
|
||||||
for _, l := range manifest.Layers {
|
fmt.Println(resp.Template)
|
||||||
if l.MediaType == "application/vnd.ollama.image.template" {
|
|
||||||
showLayer(l)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
default:
|
default:
|
||||||
usage()
|
fmt.Println("error: unknown command")
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
continue
|
||||||
} else {
|
} else {
|
||||||
usage()
|
usage()
|
||||||
continue
|
continue
|
||||||
@@ -749,6 +801,20 @@ func NewCLI() *cobra.Command {
|
|||||||
|
|
||||||
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")")
|
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")")
|
||||||
|
|
||||||
|
showCmd := &cobra.Command{
|
||||||
|
Use: "show MODEL",
|
||||||
|
Short: "Show information for a model",
|
||||||
|
Args: cobra.MinimumNArgs(1),
|
||||||
|
PreRunE: checkServerHeartbeat,
|
||||||
|
RunE: ShowHandler,
|
||||||
|
}
|
||||||
|
|
||||||
|
showCmd.Flags().Bool("license", false, "Show license of a model")
|
||||||
|
showCmd.Flags().Bool("modelfile", false, "Show Modelfile of a model")
|
||||||
|
showCmd.Flags().Bool("parameters", false, "Show parameters of a model")
|
||||||
|
showCmd.Flags().Bool("template", false, "Show template of a model")
|
||||||
|
showCmd.Flags().Bool("system", false, "Show system prompt of a model")
|
||||||
|
|
||||||
runCmd := &cobra.Command{
|
runCmd := &cobra.Command{
|
||||||
Use: "run MODEL [PROMPT]",
|
Use: "run MODEL [PROMPT]",
|
||||||
Short: "Run a model",
|
Short: "Run a model",
|
||||||
@@ -814,6 +880,7 @@ func NewCLI() *cobra.Command {
|
|||||||
rootCmd.AddCommand(
|
rootCmd.AddCommand(
|
||||||
serveCmd,
|
serveCmd,
|
||||||
createCmd,
|
createCmd,
|
||||||
|
showCmd,
|
||||||
runCmd,
|
runCmd,
|
||||||
pullCmd,
|
pullCmd,
|
||||||
pushCmd,
|
pushCmd,
|
||||||
|
|||||||
@@ -238,6 +238,10 @@ Generate embeddings from a model
|
|||||||
- `model`: name of model to generate embeddings from
|
- `model`: name of model to generate embeddings from
|
||||||
- `prompt`: text to generate embeddings for
|
- `prompt`: text to generate embeddings for
|
||||||
|
|
||||||
|
Advanced parameters:
|
||||||
|
|
||||||
|
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
||||||
|
|
||||||
### Request
|
### Request
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -10,15 +10,11 @@ package format
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto"
|
"crypto"
|
||||||
"crypto/ecdsa"
|
|
||||||
"crypto/ed25519"
|
"crypto/ed25519"
|
||||||
"crypto/elliptic"
|
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"crypto/rsa"
|
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"encoding/pem"
|
"encoding/pem"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math/big"
|
|
||||||
|
|
||||||
"golang.org/x/crypto/ssh"
|
"golang.org/x/crypto/ssh"
|
||||||
)
|
)
|
||||||
@@ -41,25 +37,6 @@ type openSSHPrivateKey struct {
|
|||||||
Rest []byte `ssh:"rest"`
|
Rest []byte `ssh:"rest"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type openSSHRSAPrivateKey struct {
|
|
||||||
N *big.Int
|
|
||||||
E *big.Int
|
|
||||||
D *big.Int
|
|
||||||
Iqmp *big.Int
|
|
||||||
P *big.Int
|
|
||||||
Q *big.Int
|
|
||||||
Comment string
|
|
||||||
Pad []byte `ssh:"rest"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type openSSHECDSAPrivateKey struct {
|
|
||||||
Curve string
|
|
||||||
Pub []byte
|
|
||||||
D *big.Int
|
|
||||||
Comment string
|
|
||||||
Pad []byte `ssh:"rest"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type openSSHEd25519PrivateKey struct {
|
type openSSHEd25519PrivateKey struct {
|
||||||
Pub []byte
|
Pub []byte
|
||||||
Priv []byte
|
Priv []byte
|
||||||
@@ -85,64 +62,6 @@ func OpenSSHPrivateKey(key crypto.PrivateKey, comment string) (*pem.Block, error
|
|||||||
}
|
}
|
||||||
|
|
||||||
switch k := key.(type) {
|
switch k := key.(type) {
|
||||||
case *rsa.PrivateKey:
|
|
||||||
e := new(big.Int).SetInt64(int64(k.E))
|
|
||||||
|
|
||||||
key := openSSHRSAPrivateKey{
|
|
||||||
N: k.N,
|
|
||||||
E: e,
|
|
||||||
D: k.D,
|
|
||||||
Iqmp: k.Precomputed.Qinv,
|
|
||||||
P: k.Primes[0],
|
|
||||||
Q: k.Primes[1],
|
|
||||||
Comment: comment,
|
|
||||||
}
|
|
||||||
|
|
||||||
pk1.Keytype = ssh.KeyAlgoRSA
|
|
||||||
pk1.Rest = ssh.Marshal(key)
|
|
||||||
|
|
||||||
w.PubKey = ssh.Marshal(struct {
|
|
||||||
KeyType string
|
|
||||||
E *big.Int
|
|
||||||
N *big.Int
|
|
||||||
}{
|
|
||||||
ssh.KeyAlgoRSA, e, k.N,
|
|
||||||
})
|
|
||||||
case *ecdsa.PrivateKey:
|
|
||||||
var curve, keytype string
|
|
||||||
switch name := k.Curve.Params().Name; name {
|
|
||||||
case "P-256":
|
|
||||||
curve = "nistp256"
|
|
||||||
keytype = ssh.KeyAlgoECDSA256
|
|
||||||
case "P-384":
|
|
||||||
curve = "nistp384"
|
|
||||||
keytype = ssh.KeyAlgoECDSA384
|
|
||||||
case "P-521":
|
|
||||||
curve = "nistp521"
|
|
||||||
keytype = ssh.KeyAlgoECDSA521
|
|
||||||
default:
|
|
||||||
return nil, fmt.Errorf("ssh: unknown curve %q", name)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub := elliptic.Marshal(k.Curve, k.X, k.Y)
|
|
||||||
|
|
||||||
key := openSSHECDSAPrivateKey{
|
|
||||||
Curve: curve,
|
|
||||||
Pub: pub,
|
|
||||||
D: k.D,
|
|
||||||
Comment: comment,
|
|
||||||
}
|
|
||||||
|
|
||||||
pk1.Keytype = keytype
|
|
||||||
pk1.Rest = ssh.Marshal(key)
|
|
||||||
|
|
||||||
w.PubKey = ssh.Marshal(struct {
|
|
||||||
KeyType string
|
|
||||||
Curve string
|
|
||||||
Pub []byte
|
|
||||||
}{
|
|
||||||
keytype, curve, pub,
|
|
||||||
})
|
|
||||||
case ed25519.PrivateKey:
|
case ed25519.PrivateKey:
|
||||||
pub, priv := k[32:], k
|
pub, priv := k[32:], k
|
||||||
key := openSSHEd25519PrivateKey{
|
key := openSSHEd25519PrivateKey{
|
||||||
|
|||||||
1
go.mod
1
go.mod
@@ -39,6 +39,7 @@ require (
|
|||||||
github.com/ugorji/go/codec v1.2.11 // indirect
|
github.com/ugorji/go/codec v1.2.11 // indirect
|
||||||
golang.org/x/arch v0.3.0 // indirect
|
golang.org/x/arch v0.3.0 // indirect
|
||||||
golang.org/x/crypto v0.10.0
|
golang.org/x/crypto v0.10.0
|
||||||
|
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63
|
||||||
golang.org/x/net v0.10.0 // indirect
|
golang.org/x/net v0.10.0 // indirect
|
||||||
golang.org/x/sys v0.11.0 // indirect
|
golang.org/x/sys v0.11.0 // indirect
|
||||||
golang.org/x/term v0.10.0
|
golang.org/x/term v0.10.0
|
||||||
|
|||||||
2
go.sum
2
go.sum
@@ -121,6 +121,8 @@ golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5y
|
|||||||
golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
|
golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
|
||||||
golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
|
golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
|
||||||
golang.org/x/exp v0.0.0-20230321023759-10a507213a29 h1:ooxPy7fPvB4kwsA2h+iBNHkAbp/4JxTSwCmvdjEYmug=
|
golang.org/x/exp v0.0.0-20230321023759-10a507213a29 h1:ooxPy7fPvB4kwsA2h+iBNHkAbp/4JxTSwCmvdjEYmug=
|
||||||
|
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
|
||||||
|
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
|
||||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||||
golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
|
golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
|
||||||
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
||||||
|
|||||||
@@ -45,6 +45,7 @@ func osPath(llamaPath string) string {
|
|||||||
if runtime.GOOS == "windows" {
|
if runtime.GOOS == "windows" {
|
||||||
return path.Join(llamaPath, "Release")
|
return path.Join(llamaPath, "Release")
|
||||||
}
|
}
|
||||||
|
|
||||||
return llamaPath
|
return llamaPath
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -68,7 +69,9 @@ func initGGML() {
|
|||||||
case "windows":
|
case "windows":
|
||||||
files = []string{"server.exe"}
|
files = []string{"server.exe"}
|
||||||
case "darwin":
|
case "darwin":
|
||||||
files = append(files, "ggml-metal.metal")
|
if llamaPath == osPath(ggmlGPU) {
|
||||||
|
files = append(files, "ggml-metal.metal")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, f := range files {
|
for _, f := range files {
|
||||||
@@ -286,8 +289,8 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti
|
|||||||
runner.Path,
|
runner.Path,
|
||||||
append(params, "--port", strconv.Itoa(port))...,
|
append(params, "--port", strconv.Itoa(port))...,
|
||||||
)
|
)
|
||||||
var stderr bytes.Buffer
|
cmd.Stdout = os.Stderr
|
||||||
cmd.Stderr = &stderr
|
cmd.Stderr = os.Stderr
|
||||||
|
|
||||||
llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel}}
|
llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel}}
|
||||||
|
|
||||||
@@ -353,11 +356,6 @@ func (llm *llama) SetOptions(opts api.Options) {
|
|||||||
llm.Options = opts
|
llm.Options = opts
|
||||||
}
|
}
|
||||||
|
|
||||||
type Prediction struct {
|
|
||||||
Content string `json:"content"`
|
|
||||||
Stop bool `json:"stop"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type GenerationSettings struct {
|
type GenerationSettings struct {
|
||||||
FrequencyPenalty float64 `json:"frequency_penalty"`
|
FrequencyPenalty float64 `json:"frequency_penalty"`
|
||||||
IgnoreEOS bool `json:"ignore_eos"`
|
IgnoreEOS bool `json:"ignore_eos"`
|
||||||
@@ -385,31 +383,19 @@ type GenerationSettings struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Timings struct {
|
type Timings struct {
|
||||||
PredictedMS float64 `json:"predicted_ms"`
|
PredictedN int `json:"predicted_n"`
|
||||||
PredictedN int `json:"predicted_n"`
|
PredictedMS float64 `json:"predicted_ms"`
|
||||||
PredictedPerSecond float64 `json:"predicted_per_second"`
|
PromptN int `json:"prompt_n"`
|
||||||
PredictedPerTokenMS float64 `json:"predicted_per_token_ms"`
|
PromptMS float64 `json:"prompt_ms"`
|
||||||
PromptMS float64 `json:"prompt_ms"`
|
|
||||||
PromptN int `json:"prompt_n"`
|
|
||||||
PromptPerSecond float64 `json:"prompt_per_second"`
|
|
||||||
PromptPerTokenMS float64 `json:"prompt_per_token_ms"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type PredictComplete struct {
|
type Prediction struct {
|
||||||
Content string `json:"content"`
|
Content string `json:"content"`
|
||||||
GenerationSettings GenerationSettings `json:"generation_settings"`
|
Model string `json:"model"`
|
||||||
Model string `json:"model"`
|
Prompt string `json:"prompt"`
|
||||||
Prompt string `json:"prompt"`
|
Stop bool `json:"stop"`
|
||||||
Stop bool `json:"stop"`
|
|
||||||
StoppedEOS bool `json:"stopped_eos"`
|
Timings `json:"timings"`
|
||||||
StoppedLimit bool `json:"stopped_limit"`
|
|
||||||
StoppedWord bool `json:"stopped_word"`
|
|
||||||
StoppingWord string `json:"stopping_word"`
|
|
||||||
Timings Timings `json:"timings"`
|
|
||||||
TokensCached int `json:"tokens_cached"`
|
|
||||||
TokensEvaluated int `json:"tokens_evaluated"`
|
|
||||||
TokensPredicted int `json:"tokens_predicted"`
|
|
||||||
Truncated bool `json:"truncated"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type PredictRequest struct {
|
type PredictRequest struct {
|
||||||
@@ -437,15 +423,19 @@ type PredictRequest struct {
|
|||||||
Stop []string `json:"stop,omitempty"`
|
Stop []string `json:"stop,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *llama) Predict(ctx context.Context, predictCtx []int, prompt string, fn func(api.GenerateResponse)) error {
|
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error {
|
||||||
// we need to find the trimmed prompt context before predicting so that we can return it to the client
|
prevConvo, err := llm.Decode(ctx, prevContext)
|
||||||
trimmedPrompt, err := llm.marshalPrompt(ctx, predictCtx, prompt)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("marshaling prompt: %v", err)
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var nextContext strings.Builder
|
||||||
|
nextContext.WriteString(prevConvo)
|
||||||
|
nextContext.WriteString(prompt)
|
||||||
|
|
||||||
endpoint := fmt.Sprintf("http://127.0.0.1:%d/completion", llm.Port)
|
endpoint := fmt.Sprintf("http://127.0.0.1:%d/completion", llm.Port)
|
||||||
predReq := PredictRequest{
|
predReq := PredictRequest{
|
||||||
Prompt: trimmedPrompt,
|
Prompt: nextContext.String(),
|
||||||
Stream: true,
|
Stream: true,
|
||||||
NPredict: llm.NumPredict,
|
NPredict: llm.NumPredict,
|
||||||
NKeep: llm.NumKeep,
|
NKeep: llm.NumKeep,
|
||||||
@@ -491,7 +481,6 @@ func (llm *llama) Predict(ctx context.Context, predictCtx []int, prompt string,
|
|||||||
}
|
}
|
||||||
|
|
||||||
scanner := bufio.NewScanner(resp.Body)
|
scanner := bufio.NewScanner(resp.Body)
|
||||||
genCtx := trimmedPrompt // start with the trimmed prompt
|
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
@@ -506,34 +495,33 @@ func (llm *llama) Predict(ctx context.Context, predictCtx []int, prompt string,
|
|||||||
// Read data from the server-side event stream
|
// Read data from the server-side event stream
|
||||||
if strings.HasPrefix(line, "data: ") {
|
if strings.HasPrefix(line, "data: ") {
|
||||||
evt := line[6:]
|
evt := line[6:]
|
||||||
var complete PredictComplete
|
var p Prediction
|
||||||
if err := json.Unmarshal([]byte(evt), &complete); err != nil {
|
if err := json.Unmarshal([]byte(evt), &p); err != nil {
|
||||||
return fmt.Errorf("error unmarshaling llm complete response: %v", err)
|
return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if complete.Timings.PredictedMS > 0 {
|
if p.Content != "" {
|
||||||
genCtx += complete.Content
|
fn(api.GenerateResponse{Response: p.Content})
|
||||||
embd, err := llm.Encode(ctx, genCtx)
|
nextContext.WriteString(p.Content)
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.Stop {
|
||||||
|
embd, err := llm.Encode(ctx, nextContext.String())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("encoding context: %v", err)
|
return fmt.Errorf("encoding context: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn(api.GenerateResponse{
|
fn(api.GenerateResponse{
|
||||||
Done: true,
|
Done: true,
|
||||||
Context: embd,
|
Context: embd,
|
||||||
PromptEvalCount: int(complete.Timings.PromptN),
|
PromptEvalCount: p.PromptN,
|
||||||
PromptEvalDuration: parseDurationMs(float64(complete.Timings.PromptMS)),
|
PromptEvalDuration: parseDurationMs(p.PromptMS),
|
||||||
EvalCount: int(complete.Timings.PredictedN),
|
EvalCount: p.PredictedN,
|
||||||
EvalDuration: parseDurationMs(float64(complete.Timings.PredictedMS)),
|
EvalDuration: parseDurationMs(p.PredictedMS),
|
||||||
})
|
})
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var pred Prediction
|
|
||||||
if err := json.Unmarshal([]byte(evt), &pred); err != nil {
|
|
||||||
return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
|
|
||||||
}
|
|
||||||
genCtx += pred.Content
|
|
||||||
fn(api.GenerateResponse{Response: pred.Content})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -545,34 +533,6 @@ func (llm *llama) Predict(ctx context.Context, predictCtx []int, prompt string,
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *llama) marshalPrompt(ctx context.Context, pCtx []int, prompt string) (string, error) {
|
|
||||||
pEncode, err := llm.Encode(ctx, prompt)
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("encoding prompt context: %w", err)
|
|
||||||
}
|
|
||||||
tokens := append(pCtx, pEncode...)
|
|
||||||
if llm.NumKeep < 0 {
|
|
||||||
llm.NumKeep = len(tokens)
|
|
||||||
}
|
|
||||||
|
|
||||||
// min(llm.NumCtx - 4, llm.NumKeep)
|
|
||||||
if llm.NumCtx-4 < llm.NumKeep {
|
|
||||||
llm.NumKeep = llm.NumCtx - 4
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(tokens) >= llm.NumCtx {
|
|
||||||
// truncate input
|
|
||||||
numLeft := (llm.NumCtx - llm.NumKeep) / 2
|
|
||||||
truncated := tokens[:llm.NumKeep]
|
|
||||||
erasedBlocks := (len(tokens) - llm.NumKeep - numLeft - 1) / numLeft
|
|
||||||
truncated = append(truncated, tokens[llm.NumKeep+erasedBlocks*numLeft:]...)
|
|
||||||
tokens = truncated
|
|
||||||
log.Printf("input truncated: num_ctx=%d num_keep=%d num_left=%d num_tokens=%d", llm.NumCtx, llm.NumKeep, numLeft, len(truncated))
|
|
||||||
}
|
|
||||||
|
|
||||||
return llm.Decode(ctx, tokens)
|
|
||||||
}
|
|
||||||
|
|
||||||
type TokenizeRequest struct {
|
type TokenizeRequest struct {
|
||||||
Content string `json:"content"`
|
Content string `json:"content"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,13 @@
|
|||||||
|
//go:build !darwin
|
||||||
|
// +build !darwin
|
||||||
|
|
||||||
package llm
|
package llm
|
||||||
|
|
||||||
//go:generate git submodule init
|
//go:generate git submodule init
|
||||||
//go:generate git submodule update --force ggml
|
//go:generate git submodule update --force ggml
|
||||||
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
|
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
|
||||||
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
|
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
|
||||||
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
|
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
|
||||||
|
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
|
||||||
|
//go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
|
||||||
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
||||||
|
|||||||
@@ -1,11 +0,0 @@
|
|||||||
//go:build darwin
|
|
||||||
// +build darwin
|
|
||||||
|
|
||||||
package llm
|
|
||||||
|
|
||||||
//go:generate git submodule init
|
|
||||||
//go:generate git submodule update --force ggml
|
|
||||||
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
|
|
||||||
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
|
|
||||||
//go:generate cmake -S ggml -B ggml/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
|
|
||||||
//go:generate cmake --build ggml/build/gpu --target server --config Release
|
|
||||||
10
llm/llama.cpp/generate_darwin_amd64.go
Normal file
10
llm/llama.cpp/generate_darwin_amd64.go
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
package llm
|
||||||
|
|
||||||
|
//go:generate git submodule init
|
||||||
|
//go:generate git submodule update --force ggml
|
||||||
|
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
|
||||||
|
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
|
||||||
|
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
|
||||||
|
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
|
||||||
|
//go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
||||||
|
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
||||||
10
llm/llama.cpp/generate_darwin_arm64.go
Normal file
10
llm/llama.cpp/generate_darwin_arm64.go
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
package llm
|
||||||
|
|
||||||
|
//go:generate git submodule init
|
||||||
|
//go:generate git submodule update --force ggml
|
||||||
|
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
|
||||||
|
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
|
||||||
|
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
|
||||||
|
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
|
||||||
|
//go:generate cmake --fresh -S ggml -B ggml/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
||||||
|
//go:generate cmake --build ggml/build/gpu --target server --config Release
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
From 8c0ea847ac1460bca534d92266e3471cb31471be Mon Sep 17 00:00:00 2001
|
||||||
|
From: Bruce MacDonald <brucewmacdonald@gmail.com>
|
||||||
|
Date: Tue, 5 Sep 2023 16:05:08 -0400
|
||||||
|
Subject: [PATCH] metal: add missing barriers for mul-mat #2699
|
||||||
|
|
||||||
|
---
|
||||||
|
ggml-metal.metal | 2 ++
|
||||||
|
1 file changed, 2 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/ggml-metal.metal b/ggml-metal.metal
|
||||||
|
index 3f31252..ce3541f 100644
|
||||||
|
--- a/ggml-metal.metal
|
||||||
|
+++ b/ggml-metal.metal
|
||||||
|
@@ -1850,6 +1850,7 @@ kernel void kernel_mul_mm(device const uchar * src0,
|
||||||
|
//load data and store to threadgroup memory
|
||||||
|
half4x4 temp_a;
|
||||||
|
dequantize_func(x, il, temp_a);
|
||||||
|
+ threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
#pragma unroll(16)
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
*(sa + SG_MAT_SIZE * ((tiitg / THREAD_PER_ROW / 8) \
|
||||||
|
@@ -1895,6 +1896,7 @@ kernel void kernel_mul_mm(device const uchar * src0,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// block is smaller than 64x32, we should avoid writing data outside of the matrix
|
||||||
|
+ threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
|
||||||
|
+ 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
--
|
||||||
|
2.39.2 (Apple Git-143)
|
||||||
|
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
From dadbed99e65252d79f81101a392d0d6497b86caa Mon Sep 17 00:00:00 2001
|
||||||
|
From: Shouzheng Liu <lshzh.hi@gmail.com>
|
||||||
|
Date: Mon, 21 Aug 2023 06:59:29 -0400
|
||||||
|
Subject: [PATCH] metal : fix synchronization in new matrix multiplication
|
||||||
|
kernel (#2686)
|
||||||
|
|
||||||
|
---
|
||||||
|
ggml-metal.metal | 3 ++-
|
||||||
|
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/ggml-metal.metal b/ggml-metal.metal
|
||||||
|
index 3f31252..88d48f6 100644
|
||||||
|
--- a/ggml-metal.metal
|
||||||
|
+++ b/ggml-metal.metal
|
||||||
|
@@ -1898,10 +1898,11 @@ kernel void kernel_mul_mm(device const uchar * src0,
|
||||||
|
threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
|
||||||
|
+ 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
+ threadgroup_barrier(mem_flags::mem_device);
|
||||||
|
simdgroup_store(c_res[i], temp_str + 8 * (i%4) + 8 * BLOCK_SIZE_M * (i/4), BLOCK_SIZE_M);
|
||||||
|
}
|
||||||
|
|
||||||
|
- threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
+ threadgroup_barrier(mem_flags::mem_device);
|
||||||
|
device float *C = dst + BLOCK_SIZE_M * r0 + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0;
|
||||||
|
if (sgitg==0) {
|
||||||
|
for (int i = 0; i < n_rows; i++) {
|
||||||
|
--
|
||||||
|
2.41.0
|
||||||
|
|
||||||
@@ -0,0 +1,41 @@
|
|||||||
|
From 14b1d7e6f720dee41ce5a826376df738096d9033 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Shouzheng Liu <lshzh.hi@gmail.com>
|
||||||
|
Date: Tue, 22 Aug 2023 02:18:40 -0400
|
||||||
|
Subject: [PATCH] metal : add missing barriers for mul-mat (#2699)
|
||||||
|
|
||||||
|
---
|
||||||
|
ggml-metal.metal | 5 +++--
|
||||||
|
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/ggml-metal.metal b/ggml-metal.metal
|
||||||
|
index 88d48f6..ce3541f 100644
|
||||||
|
--- a/ggml-metal.metal
|
||||||
|
+++ b/ggml-metal.metal
|
||||||
|
@@ -1850,6 +1850,7 @@ kernel void kernel_mul_mm(device const uchar * src0,
|
||||||
|
//load data and store to threadgroup memory
|
||||||
|
half4x4 temp_a;
|
||||||
|
dequantize_func(x, il, temp_a);
|
||||||
|
+ threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
#pragma unroll(16)
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
*(sa + SG_MAT_SIZE * ((tiitg / THREAD_PER_ROW / 8) \
|
||||||
|
@@ -1895,14 +1896,14 @@ kernel void kernel_mul_mm(device const uchar * src0,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// block is smaller than 64x32, we should avoid writing data outside of the matrix
|
||||||
|
+ threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
|
||||||
|
+ 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
- threadgroup_barrier(mem_flags::mem_device);
|
||||||
|
simdgroup_store(c_res[i], temp_str + 8 * (i%4) + 8 * BLOCK_SIZE_M * (i/4), BLOCK_SIZE_M);
|
||||||
|
}
|
||||||
|
|
||||||
|
- threadgroup_barrier(mem_flags::mem_device);
|
||||||
|
+ threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
device float *C = dst + BLOCK_SIZE_M * r0 + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0;
|
||||||
|
if (sgitg==0) {
|
||||||
|
for (int i = 0; i < n_rows; i++) {
|
||||||
|
--
|
||||||
|
2.41.0
|
||||||
|
|
||||||
@@ -6,8 +6,11 @@ GO_LDFLAGS="-X github.com/jmorganca/ollama/version.Version=$VERSION"
|
|||||||
GO_LDFLAGS="$GO_LDFLAGS -X github.com/jmorganca/ollama/server.mode=release"
|
GO_LDFLAGS="$GO_LDFLAGS -X github.com/jmorganca/ollama/server.mode=release"
|
||||||
|
|
||||||
# build universal binary
|
# build universal binary
|
||||||
CGO_ENABLED=1 GOARCH=arm64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-arm64
|
GOARCH=arm64 go generate ./...
|
||||||
CGO_ENABLED=1 GOARCH=amd64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-amd64
|
GOARCH=arm64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-arm64
|
||||||
|
rm -rf llm/llama.cpp/ggml/build/*/bin
|
||||||
|
GOARCH=amd64 go generate ./...
|
||||||
|
GOARCH=amd64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-amd64
|
||||||
lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
|
lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
|
||||||
rm dist/ollama-darwin-amd64 dist/ollama-darwin-arm64
|
rm dist/ollama-darwin-amd64 dist/ollama-darwin-arm64
|
||||||
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
|
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
|
||||||
|
|||||||
150
server/images.go
150
server/images.go
@@ -22,6 +22,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"text/template"
|
"text/template"
|
||||||
|
|
||||||
|
"golang.org/x/exp/slices"
|
||||||
|
|
||||||
"github.com/jmorganca/ollama/api"
|
"github.com/jmorganca/ollama/api"
|
||||||
"github.com/jmorganca/ollama/llm"
|
"github.com/jmorganca/ollama/llm"
|
||||||
"github.com/jmorganca/ollama/parser"
|
"github.com/jmorganca/ollama/parser"
|
||||||
@@ -39,15 +41,18 @@ type RegistryOptions struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
ModelPath string
|
ShortName string
|
||||||
AdapterPaths []string
|
ModelPath string
|
||||||
Template string
|
OriginalModel string
|
||||||
System string
|
AdapterPaths []string
|
||||||
Digest string
|
Template string
|
||||||
ConfigDigest string
|
System string
|
||||||
Options map[string]interface{}
|
License []string
|
||||||
Embeddings []vector.Embedding
|
Digest string
|
||||||
|
ConfigDigest string
|
||||||
|
Options map[string]interface{}
|
||||||
|
Embeddings []vector.Embedding
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Model) Prompt(request api.GenerateRequest, embedding string) (string, error) {
|
func (m *Model) Prompt(request api.GenerateRequest, embedding string) (string, error) {
|
||||||
@@ -111,6 +116,7 @@ type LayerReader struct {
|
|||||||
type ConfigV2 struct {
|
type ConfigV2 struct {
|
||||||
ModelFamily llm.ModelFamily `json:"model_family"`
|
ModelFamily llm.ModelFamily `json:"model_family"`
|
||||||
ModelType string `json:"model_type"`
|
ModelType string `json:"model_type"`
|
||||||
|
ModelFormat string `json:"model_format"`
|
||||||
FileType string `json:"file_type"`
|
FileType string `json:"file_type"`
|
||||||
RootFS RootFS `json:"rootfs"`
|
RootFS RootFS `json:"rootfs"`
|
||||||
|
|
||||||
@@ -169,9 +175,11 @@ func GetModel(name string) (*Model, error) {
|
|||||||
|
|
||||||
model := &Model{
|
model := &Model{
|
||||||
Name: mp.GetFullTagname(),
|
Name: mp.GetFullTagname(),
|
||||||
|
ShortName: mp.GetShortTagname(),
|
||||||
Digest: digest,
|
Digest: digest,
|
||||||
ConfigDigest: manifest.Config.Digest,
|
ConfigDigest: manifest.Config.Digest,
|
||||||
Template: "{{ .Prompt }}",
|
Template: "{{ .Prompt }}",
|
||||||
|
License: []string{},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, layer := range manifest.Layers {
|
for _, layer := range manifest.Layers {
|
||||||
@@ -183,6 +191,7 @@ func GetModel(name string) (*Model, error) {
|
|||||||
switch layer.MediaType {
|
switch layer.MediaType {
|
||||||
case "application/vnd.ollama.image.model":
|
case "application/vnd.ollama.image.model":
|
||||||
model.ModelPath = filename
|
model.ModelPath = filename
|
||||||
|
model.OriginalModel = layer.From
|
||||||
case "application/vnd.ollama.image.embed":
|
case "application/vnd.ollama.image.embed":
|
||||||
file, err := os.Open(filename)
|
file, err := os.Open(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -227,6 +236,12 @@ func GetModel(name string) (*Model, error) {
|
|||||||
if err = json.NewDecoder(params).Decode(&model.Options); err != nil {
|
if err = json.NewDecoder(params).Decode(&model.Options); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
case "application/vnd.ollama.image.license":
|
||||||
|
bts, err := os.ReadFile(filename)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
model.License = append(model.License, string(bts))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -274,6 +289,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
|||||||
|
|
||||||
var layers []*LayerReader
|
var layers []*LayerReader
|
||||||
params := make(map[string][]string)
|
params := make(map[string][]string)
|
||||||
|
var sourceParams map[string]any
|
||||||
embed := EmbeddingParams{fn: fn}
|
embed := EmbeddingParams{fn: fn}
|
||||||
for _, c := range commands {
|
for _, c := range commands {
|
||||||
log.Printf("[%s] - %s\n", c.Name, c.Args)
|
log.Printf("[%s] - %s\n", c.Name, c.Args)
|
||||||
@@ -320,6 +336,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
|||||||
|
|
||||||
config.ModelFamily = ggml.ModelFamily()
|
config.ModelFamily = ggml.ModelFamily()
|
||||||
config.ModelType = ggml.ModelType().String()
|
config.ModelType = ggml.ModelType().String()
|
||||||
|
config.ModelFormat = ggml.Name()
|
||||||
config.FileType = ggml.FileType().String()
|
config.FileType = ggml.FileType().String()
|
||||||
|
|
||||||
// reset the file
|
// reset the file
|
||||||
@@ -351,12 +368,30 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// copie the model metadata
|
// copy the model metadata
|
||||||
config.ModelFamily = source.ModelFamily
|
config.ModelFamily = source.ModelFamily
|
||||||
config.ModelType = source.ModelType
|
config.ModelType = source.ModelType
|
||||||
|
config.ModelFormat = source.ModelFormat
|
||||||
config.FileType = source.FileType
|
config.FileType = source.FileType
|
||||||
|
|
||||||
for _, l := range mf.Layers {
|
for _, l := range mf.Layers {
|
||||||
|
if l.MediaType == "application/vnd.ollama.image.params" {
|
||||||
|
sourceParamsBlobPath, err := GetBlobsPath(l.Digest)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
sourceParamsBlob, err := os.Open(sourceParamsBlobPath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer sourceParamsBlob.Close()
|
||||||
|
|
||||||
|
if err := json.NewDecoder(sourceParamsBlob).Decode(&sourceParams); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
newLayer, err := GetLayerWithBufferFromLayer(l)
|
newLayer, err := GetLayerWithBufferFromLayer(l)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -427,12 +462,19 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
|||||||
// Create a single layer for the parameters
|
// Create a single layer for the parameters
|
||||||
if len(params) > 0 {
|
if len(params) > 0 {
|
||||||
fn(api.ProgressResponse{Status: "creating parameter layer"})
|
fn(api.ProgressResponse{Status: "creating parameter layer"})
|
||||||
|
|
||||||
layers = removeLayerFromLayers(layers, "application/vnd.ollama.image.params")
|
layers = removeLayerFromLayers(layers, "application/vnd.ollama.image.params")
|
||||||
formattedParams, err := formatParams(params)
|
formattedParams, err := formatParams(params)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("couldn't create params json: %v", err)
|
return fmt.Errorf("couldn't create params json: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for k, v := range sourceParams {
|
||||||
|
if _, ok := formattedParams[k]; !ok {
|
||||||
|
formattedParams[k] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bts, err := json.Marshal(formattedParams)
|
bts, err := json.Marshal(formattedParams)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -630,14 +672,9 @@ func existingFileEmbeddings(digest string) (map[string][]float64, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func removeLayerFromLayers(layers []*LayerReader, mediaType string) []*LayerReader {
|
func removeLayerFromLayers(layers []*LayerReader, mediaType string) []*LayerReader {
|
||||||
j := 0
|
return slices.DeleteFunc(layers, func(layer *LayerReader) bool {
|
||||||
for _, l := range layers {
|
return layer.MediaType == mediaType
|
||||||
if l.MediaType != mediaType {
|
})
|
||||||
layers[j] = l
|
|
||||||
j++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return layers[:j]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func SaveLayers(layers []*LayerReader, fn func(resp api.ProgressResponse), force bool) error {
|
func SaveLayers(layers []*LayerReader, fn func(resp api.ProgressResponse), force bool) error {
|
||||||
@@ -911,6 +948,83 @@ func DeleteModel(name string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ShowModelfile(model *Model) (string, error) {
|
||||||
|
type modelTemplate struct {
|
||||||
|
*Model
|
||||||
|
From string
|
||||||
|
Params string
|
||||||
|
}
|
||||||
|
|
||||||
|
var params []string
|
||||||
|
for k, v := range model.Options {
|
||||||
|
switch val := v.(type) {
|
||||||
|
case string:
|
||||||
|
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, val))
|
||||||
|
case int:
|
||||||
|
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.Itoa(val)))
|
||||||
|
case float64:
|
||||||
|
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.FormatFloat(val, 'f', 0, 64)))
|
||||||
|
case bool:
|
||||||
|
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.FormatBool(val)))
|
||||||
|
case []interface{}:
|
||||||
|
for _, nv := range val {
|
||||||
|
switch nval := nv.(type) {
|
||||||
|
case string:
|
||||||
|
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, nval))
|
||||||
|
case int:
|
||||||
|
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.Itoa(nval)))
|
||||||
|
case float64:
|
||||||
|
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.FormatFloat(nval, 'f', 0, 64)))
|
||||||
|
case bool:
|
||||||
|
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.FormatBool(nval)))
|
||||||
|
default:
|
||||||
|
log.Printf("unknown type: %s", reflect.TypeOf(nv).String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
log.Printf("unknown type: %s", reflect.TypeOf(v).String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mt := modelTemplate{
|
||||||
|
Model: model,
|
||||||
|
From: model.OriginalModel,
|
||||||
|
Params: strings.Join(params, "\n"),
|
||||||
|
}
|
||||||
|
|
||||||
|
if mt.From == "" {
|
||||||
|
mt.From = model.ModelPath
|
||||||
|
}
|
||||||
|
|
||||||
|
modelFile := `# Modelfile generated by "ollama show"
|
||||||
|
# To build a new Modelfile based on this one, replace the FROM line with:
|
||||||
|
# FROM {{ .ShortName }}
|
||||||
|
|
||||||
|
FROM {{ .From }}
|
||||||
|
TEMPLATE """{{ .Template }}"""
|
||||||
|
SYSTEM """{{ .System }}"""
|
||||||
|
{{ .Params }}
|
||||||
|
`
|
||||||
|
for _, l := range mt.Model.AdapterPaths {
|
||||||
|
modelFile += fmt.Sprintf("ADAPTER %s\n", l)
|
||||||
|
}
|
||||||
|
|
||||||
|
tmpl, err := template.New("").Parse(modelFile)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("error parsing template: %q", err)
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
|
||||||
|
if err = tmpl.Execute(&buf, mt); err != nil {
|
||||||
|
log.Printf("error executing template: %q", err)
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf.String(), nil
|
||||||
|
}
|
||||||
|
|
||||||
func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn func(api.ProgressResponse)) error {
|
func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn func(api.ProgressResponse)) error {
|
||||||
mp := ParseModelPath(name)
|
mp := ParseModelPath(name)
|
||||||
fn(api.ProgressResponse{Status: "retrieving manifest"})
|
fn(api.ProgressResponse{Status: "retrieving manifest"})
|
||||||
|
|||||||
@@ -114,7 +114,12 @@ func GetManifestPath() (string, error) {
|
|||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
return filepath.Join(home, ".ollama", "models", "manifests"), nil
|
path := filepath.Join(home, ".ollama", "models", "manifests")
|
||||||
|
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return path, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetBlobsPath(digest string) (string, error) {
|
func GetBlobsPath(digest string) (string, error) {
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
"os/signal"
|
"os/signal"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"syscall"
|
"syscall"
|
||||||
@@ -117,12 +118,13 @@ func load(ctx context.Context, model *Model, reqOpts map[string]interface{}, ses
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
tokensNoSystem, err := llmModel.Encode(ctx, promptNoSystem)
|
tokensNoSystem, err := llmModel.Encode(ctx, promptNoSystem)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
|
opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem)
|
||||||
|
|
||||||
llmModel.SetOptions(opts)
|
llmModel.SetOptions(opts)
|
||||||
}
|
}
|
||||||
@@ -363,6 +365,77 @@ func DeleteModelHandler(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ShowModelHandler(c *gin.Context) {
|
||||||
|
var req api.ShowRequest
|
||||||
|
if err := c.ShouldBindJSON(&req); err != nil {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := GetModelInfo(req.Name)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
|
||||||
|
} else {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
c.JSON(http.StatusOK, resp)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetModelInfo(name string) (*api.ShowResponse, error) {
|
||||||
|
model, err := GetModel(name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := &api.ShowResponse{
|
||||||
|
License: strings.Join(model.License, "\n"),
|
||||||
|
System: model.System,
|
||||||
|
Template: model.Template,
|
||||||
|
}
|
||||||
|
|
||||||
|
mf, err := ShowModelfile(model)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
resp.Modelfile = mf
|
||||||
|
|
||||||
|
var params []string
|
||||||
|
cs := 30
|
||||||
|
for k, v := range model.Options {
|
||||||
|
switch val := v.(type) {
|
||||||
|
case string:
|
||||||
|
params = append(params, fmt.Sprintf("%-*s %s", cs, k, val))
|
||||||
|
case int:
|
||||||
|
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(val)))
|
||||||
|
case float64:
|
||||||
|
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(val, 'f', 0, 64)))
|
||||||
|
case bool:
|
||||||
|
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(val)))
|
||||||
|
case []interface{}:
|
||||||
|
for _, nv := range val {
|
||||||
|
switch nval := nv.(type) {
|
||||||
|
case string:
|
||||||
|
params = append(params, fmt.Sprintf("%-*s %s", cs, k, nval))
|
||||||
|
case int:
|
||||||
|
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(nval)))
|
||||||
|
case float64:
|
||||||
|
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(nval, 'f', 0, 64)))
|
||||||
|
case bool:
|
||||||
|
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(nval)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resp.Parameters = strings.Join(params, "\n")
|
||||||
|
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
func ListModelsHandler(c *gin.Context) {
|
func ListModelsHandler(c *gin.Context) {
|
||||||
var models []api.ModelResponse
|
var models []api.ModelResponse
|
||||||
fp, err := GetManifestPath()
|
fp, err := GetManifestPath()
|
||||||
@@ -456,6 +529,7 @@ func Serve(ln net.Listener, origins []string) error {
|
|||||||
r.POST("/api/copy", CopyModelHandler)
|
r.POST("/api/copy", CopyModelHandler)
|
||||||
r.GET("/api/tags", ListModelsHandler)
|
r.GET("/api/tags", ListModelsHandler)
|
||||||
r.DELETE("/api/delete", DeleteModelHandler)
|
r.DELETE("/api/delete", DeleteModelHandler)
|
||||||
|
r.POST("/api/show", ShowModelHandler)
|
||||||
|
|
||||||
log.Printf("Listening on %s", ln.Addr())
|
log.Printf("Listening on %s", ln.Addr())
|
||||||
s := &http.Server{
|
s := &http.Server{
|
||||||
|
|||||||
Reference in New Issue
Block a user