mirror of
https://github.com/ollama/ollama.git
synced 2026-04-25 10:16:00 +02:00
Compare commits
16 Commits
brucemacd/
...
pdevine/ge
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b7349a4efd | ||
|
|
4cda3e3622 | ||
|
|
95fbf1da12 | ||
|
|
83d1a1ab55 | ||
|
|
035e69799e | ||
|
|
10e06d0a45 | ||
|
|
8cf1ea4fd8 | ||
|
|
d231229122 | ||
|
|
fad98fabab | ||
|
|
7b5d916a9a | ||
|
|
33ad61b112 | ||
|
|
716e365615 | ||
|
|
3b4424ff98 | ||
|
|
0667baddc6 | ||
|
|
d006e1e09b | ||
|
|
df2680b4b9 |
4
.github/workflows/release.yaml
vendored
4
.github/workflows/release.yaml
vendored
@@ -329,7 +329,9 @@ jobs:
|
|||||||
done
|
done
|
||||||
working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
|
working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
|
||||||
- run: |
|
- run: |
|
||||||
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz); done
|
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do
|
||||||
|
tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE --owner 0 --group 0 | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz);
|
||||||
|
done
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: dist-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}
|
name: dist-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}
|
||||||
|
|||||||
@@ -381,6 +381,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
|
- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
|
||||||
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
|
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
|
||||||
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
|
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
|
||||||
|
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
|
||||||
|
|
||||||
### Cloud
|
### Cloud
|
||||||
|
|
||||||
@@ -548,6 +549,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
|
- [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
|
||||||
- [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language
|
- [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language
|
||||||
- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
|
- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
|
||||||
|
- [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) (telegram bot, primary for RP. Oobabooga-like buttons, [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) API integration e.t.c)
|
||||||
|
|
||||||
### Supported backends
|
### Supported backends
|
||||||
|
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ Here's a quick example showing API access from `powershell`
|
|||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
Ollama on Windows stores files in a few different locations. You can view them in
|
Ollama on Windows stores files in a few different locations. You can view them in
|
||||||
the explorer window by hitting `<cmd>+R` and type in:
|
the explorer window by hitting `<Ctrl>+R` and type in:
|
||||||
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
|
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
|
||||||
- *app.log* contains most resent logs from the GUI application
|
- *app.log* contains most resent logs from the GUI application
|
||||||
- *server.log* contains the most recent server logs
|
- *server.log* contains the most recent server logs
|
||||||
|
|||||||
@@ -12,6 +12,9 @@ func TestHumanNumber(t *testing.T) {
|
|||||||
|
|
||||||
testCases := []testCase{
|
testCases := []testCase{
|
||||||
{0, "0"},
|
{0, "0"},
|
||||||
|
{999, "999"},
|
||||||
|
{1000, "1K"},
|
||||||
|
{1001, "1K"},
|
||||||
{1000000, "1M"},
|
{1000000, "1M"},
|
||||||
{125000000, "125M"},
|
{125000000, "125M"},
|
||||||
{500500000, "500.50M"},
|
{500500000, "500.50M"},
|
||||||
|
|||||||
@@ -120,6 +120,15 @@ func (kv KV) Uints(key string, defaultValue ...[]uint32) []uint32 {
|
|||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (kv KV) Floats(key string, defaultValue ...[]float32) []float32 {
|
||||||
|
r := keyValue(kv, key, &array{})
|
||||||
|
s := make([]float32, r.size)
|
||||||
|
for i := range r.size {
|
||||||
|
s[i] = float32(r.values[i].(float32))
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
func keyValue[T string | uint32 | uint64 | float32 | *array](kv KV, key string, defaultValue ...T) T {
|
func keyValue[T string | uint32 | uint64 | float32 | *array](kv KV, key string, defaultValue ...T) T {
|
||||||
if !strings.HasPrefix(key, "tokenizer.") && !strings.HasPrefix(key, "general.") {
|
if !strings.HasPrefix(key, "tokenizer.") && !strings.HasPrefix(key, "general.") {
|
||||||
key = kv.Architecture() + "." + key
|
key = kv.Architecture() + "." + key
|
||||||
|
|||||||
1
go.mod
1
go.mod
@@ -18,6 +18,7 @@ require (
|
|||||||
github.com/agnivade/levenshtein v1.1.1
|
github.com/agnivade/levenshtein v1.1.1
|
||||||
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
|
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
|
||||||
github.com/dlclark/regexp2 v1.11.4
|
github.com/dlclark/regexp2 v1.11.4
|
||||||
|
github.com/emirpasic/gods v1.18.1
|
||||||
github.com/emirpasic/gods/v2 v2.0.0-alpha
|
github.com/emirpasic/gods/v2 v2.0.0-alpha
|
||||||
github.com/google/go-cmp v0.6.0
|
github.com/google/go-cmp v0.6.0
|
||||||
github.com/mattn/go-runewidth v0.0.14
|
github.com/mattn/go-runewidth v0.0.14
|
||||||
|
|||||||
2
go.sum
2
go.sum
@@ -44,6 +44,8 @@ github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+
|
|||||||
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
|
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
|
||||||
github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
|
github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
|
||||||
github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||||
|
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
|
||||||
|
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
|
||||||
github.com/emirpasic/gods/v2 v2.0.0-alpha h1:dwFlh8pBg1VMOXWGipNMRt8v96dKAIvBehtCt6OtunU=
|
github.com/emirpasic/gods/v2 v2.0.0-alpha h1:dwFlh8pBg1VMOXWGipNMRt8v96dKAIvBehtCt6OtunU=
|
||||||
github.com/emirpasic/gods/v2 v2.0.0-alpha/go.mod h1:W0y4M2dtBB9U5z3YlghmpuUhiaZT2h6yoeE+C1sCp6A=
|
github.com/emirpasic/gods/v2 v2.0.0-alpha/go.mod h1:W0y4M2dtBB9U5z3YlghmpuUhiaZT2h6yoeE+C1sCp6A=
|
||||||
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||||
|
|||||||
@@ -305,6 +305,10 @@ func (b *testBackend) NewContext() ml.Context {
|
|||||||
return &testContext{}
|
return &testContext{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *testBackend) SystemInfo() string {
|
||||||
|
return "not implemented"
|
||||||
|
}
|
||||||
|
|
||||||
type testContext struct{}
|
type testContext struct{}
|
||||||
|
|
||||||
func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
|
func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
|
||||||
@@ -430,7 +434,7 @@ func (t *testTensor) Conv2D(ctx ml.Context, weight ml.Tensor, s0, s1, p0, p1, d0
|
|||||||
panic("not implemented")
|
panic("not implemented")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *testTensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, dim uint32, base, scale float32) ml.Tensor {
|
func (t *testTensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, dim, ropeType uint32, base, scale float32) ml.Tensor {
|
||||||
panic("not implemented")
|
panic("not implemented")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -17,12 +17,14 @@ type Config interface {
|
|||||||
|
|
||||||
Strings(string, ...[]string) []string
|
Strings(string, ...[]string) []string
|
||||||
Uints(string, ...[]uint32) []uint32
|
Uints(string, ...[]uint32) []uint32
|
||||||
|
Floats(string, ...[]float32) []float32
|
||||||
}
|
}
|
||||||
|
|
||||||
type Backend interface {
|
type Backend interface {
|
||||||
Config() Config
|
Config() Config
|
||||||
Get(name string) Tensor
|
Get(name string) Tensor
|
||||||
NewContext() Context
|
NewContext() Context
|
||||||
|
SystemInfo() string
|
||||||
}
|
}
|
||||||
|
|
||||||
var backends = make(map[string]func(*os.File) (Backend, error))
|
var backends = make(map[string]func(*os.File) (Backend, error))
|
||||||
@@ -75,7 +77,7 @@ type Tensor interface {
|
|||||||
Scale(ctx Context, s float64) Tensor
|
Scale(ctx Context, s float64) Tensor
|
||||||
|
|
||||||
Conv2D(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
|
Conv2D(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
|
||||||
RoPE(ctx Context, positionIDs, ropeFactors Tensor, dim uint32, base, scale float32) Tensor
|
RoPE(ctx Context, positionIDs, ropeFactors Tensor, dim, ropeType uint32, base, scale float32) Tensor
|
||||||
|
|
||||||
Tanh(ctx Context) Tensor
|
Tanh(ctx Context) Tensor
|
||||||
GELU(ctx Context) Tensor
|
GELU(ctx Context) Tensor
|
||||||
|
|||||||
@@ -1,11 +1,27 @@
|
|||||||
package ggml
|
package ggml
|
||||||
|
|
||||||
// #cgo CPPFLAGS: -I${SRCDIR}/ggml/include
|
/*
|
||||||
// #include <stdlib.h>
|
#cgo CPPFLAGS: -I${SRCDIR}/ggml/include
|
||||||
// #include <stdint.h>
|
#include <stdlib.h>
|
||||||
// #include "ggml.h"
|
#include <stdint.h>
|
||||||
// #include "ggml-cpu.h"
|
#include "ggml.h"
|
||||||
// #include "ggml-backend.h"
|
#include "ggml-cpu.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
static struct ggml_backend_feature * getBackendFeatures(void *fp, ggml_backend_reg_t reg) {return ((ggml_backend_get_features_t)(fp))(reg);}
|
||||||
|
static struct ggml_backend_feature * getNextBackendFeatures(struct ggml_backend_feature * feature) { return &feature[1];}
|
||||||
|
|
||||||
|
typedef enum {COMP_UNKNOWN,COMP_GCC,COMP_CLANG} COMPILER;
|
||||||
|
COMPILER inline get_compiler() {
|
||||||
|
#if defined(__clang__)
|
||||||
|
return COMP_CLANG;
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
return COMP_GCC;
|
||||||
|
#else
|
||||||
|
return UNKNOWN_COMPILER;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
*/
|
||||||
import "C"
|
import "C"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -580,10 +596,13 @@ func (t *Tensor) View(ctx ml.Context, offset int, shape ...int) ml.Tensor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
ropeTypeNorm C.int = iota
|
ropeTypeNorm C.int = 0
|
||||||
|
ropeTypeNeox C.int = 2
|
||||||
|
ropeTypeMrope C.int = 8
|
||||||
|
ropeTypeVision C.int = 24
|
||||||
)
|
)
|
||||||
|
|
||||||
func (t *Tensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, ropeDim uint32, ropeBase, ropeScale float32) ml.Tensor {
|
func (t *Tensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, ropeDim, ropeType uint32, ropeBase, ropeScale float32) ml.Tensor {
|
||||||
if ropeFactors == nil {
|
if ropeFactors == nil {
|
||||||
ropeFactors = &Tensor{}
|
ropeFactors = &Tensor{}
|
||||||
}
|
}
|
||||||
@@ -597,8 +616,8 @@ func (t *Tensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, ropeDi
|
|||||||
t: C.ggml_rope_ext(
|
t: C.ggml_rope_ext(
|
||||||
ctx.(*Context).ctx, dequant, positionIDs.(*Tensor).t, ropeFactors.(*Tensor).t,
|
ctx.(*Context).ctx, dequant, positionIDs.(*Tensor).t, ropeFactors.(*Tensor).t,
|
||||||
C.int(ropeDim),
|
C.int(ropeDim),
|
||||||
|
C.int(ropeType),
|
||||||
131072, // YaRN n_ctx_train
|
131072, // YaRN n_ctx_train
|
||||||
ropeTypeNorm, // ROPE_TYPE_NORM
|
|
||||||
C.float(ropeBase),
|
C.float(ropeBase),
|
||||||
C.float(ropeScale),
|
C.float(ropeScale),
|
||||||
0., // YaRN ext_factor
|
0., // YaRN ext_factor
|
||||||
@@ -626,3 +645,34 @@ func (t *Tensor) Conv2D(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int
|
|||||||
t: C.ggml_conv_2d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1)),
|
t: C.ggml_conv_2d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *Backend) SystemInfo() string {
|
||||||
|
var compiler string
|
||||||
|
switch C.get_compiler() {
|
||||||
|
case C.COMP_UNKNOWN:
|
||||||
|
compiler = "cgo(unknown_compiler)"
|
||||||
|
case C.COMP_GCC:
|
||||||
|
compiler = "cgo(gcc)"
|
||||||
|
case C.COMP_CLANG:
|
||||||
|
compiler = "cgo(clang)"
|
||||||
|
}
|
||||||
|
|
||||||
|
var s string
|
||||||
|
for i := range C.ggml_backend_reg_count() {
|
||||||
|
reg := C.ggml_backend_reg_get(i)
|
||||||
|
fName := C.CString("ggml_backend_get_features")
|
||||||
|
defer C.free(unsafe.Pointer(fName))
|
||||||
|
get_features_fn := C.ggml_backend_reg_get_proc_address(reg, fName)
|
||||||
|
if get_features_fn != nil {
|
||||||
|
s += C.GoString(C.ggml_backend_reg_name(reg))
|
||||||
|
s += " : "
|
||||||
|
for features := C.getBackendFeatures(get_features_fn, reg); features.name != nil; features = C.getNextBackendFeatures(features) {
|
||||||
|
s += C.GoString(features.name)
|
||||||
|
s += " = "
|
||||||
|
s += C.GoString(features.value)
|
||||||
|
s += " | "
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s + compiler
|
||||||
|
}
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import (
|
|||||||
_ "github.com/ollama/ollama/ml/backend"
|
_ "github.com/ollama/ollama/ml/backend"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Options contains the inputs for a model forward pass
|
||||||
type Options struct {
|
type Options struct {
|
||||||
Inputs []int32
|
Inputs []int32
|
||||||
Positions []int32
|
Positions []int32
|
||||||
@@ -34,11 +35,13 @@ type config struct {
|
|||||||
Cache kvcache.Cache
|
Cache kvcache.Cache
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Base implements the common fields and methods for all models
|
||||||
type Base struct {
|
type Base struct {
|
||||||
b ml.Backend
|
b ml.Backend
|
||||||
config
|
config
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Backend returns the underlying backend that will run the model
|
||||||
func (m *Base) Backend() ml.Backend {
|
func (m *Base) Backend() ml.Backend {
|
||||||
return m.b
|
return m.b
|
||||||
}
|
}
|
||||||
@@ -47,6 +50,7 @@ func (m *Base) Config() config {
|
|||||||
return m.config
|
return m.config
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Model implements a specific model architecture, defining the forward pass and any model-specific configuration
|
||||||
type Model interface {
|
type Model interface {
|
||||||
Forward(ml.Context, Options) (ml.Tensor, error)
|
Forward(ml.Context, Options) (ml.Tensor, error)
|
||||||
|
|
||||||
@@ -56,6 +60,7 @@ type Model interface {
|
|||||||
|
|
||||||
var models = make(map[string]func(ml.Config) (Model, error))
|
var models = make(map[string]func(ml.Config) (Model, error))
|
||||||
|
|
||||||
|
// Register registers a model constructor for the given architecture
|
||||||
func Register(name string, f func(ml.Config) (Model, error)) {
|
func Register(name string, f func(ml.Config) (Model, error)) {
|
||||||
if _, ok := models[name]; ok {
|
if _, ok := models[name]; ok {
|
||||||
panic("model: model already registered")
|
panic("model: model already registered")
|
||||||
@@ -64,8 +69,9 @@ func Register(name string, f func(ml.Config) (Model, error)) {
|
|||||||
models[name] = f
|
models[name] = f
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(s string) (Model, error) {
|
// New initializes a new model instance with the provided configuration based on the metadata in the model file
|
||||||
r, err := os.Open(s)
|
func New(modelPath string) (Model, error) {
|
||||||
|
r, err := os.Open(modelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
193
model/models/gemma2/model.go
Normal file
193
model/models/gemma2/model.go
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
package gemma2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/kvcache"
|
||||||
|
"github.com/ollama/ollama/ml"
|
||||||
|
"github.com/ollama/ollama/ml/nn"
|
||||||
|
"github.com/ollama/ollama/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Options struct {
|
||||||
|
hiddenSize, numHeads, numKVHeads int
|
||||||
|
attnKeyLen, attnValLen int
|
||||||
|
eps, ropeBase, ropeScale float32
|
||||||
|
attnLogitSoftcap float32
|
||||||
|
finalLogitSoftcap float32
|
||||||
|
}
|
||||||
|
|
||||||
|
type Model struct {
|
||||||
|
model.Base
|
||||||
|
model.SentencePieceModel
|
||||||
|
|
||||||
|
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
||||||
|
Layers []Layer `gguf:"blk"`
|
||||||
|
OutputNorm *nn.RMSNorm `gguf:"output_norm"` // is this supposed to be root means square?
|
||||||
|
Output *nn.Linear `gguf:"output,alt:token_embd"` // just set to token_embd?
|
||||||
|
|
||||||
|
*Options
|
||||||
|
}
|
||||||
|
|
||||||
|
func New(c ml.Config) (model.Model, error) {
|
||||||
|
m := Model{
|
||||||
|
SentencePieceModel: model.NewSentencePieceModel(
|
||||||
|
c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
|
||||||
|
&model.Vocabulary{
|
||||||
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
|
Scores: c.Floats("tokenizer.ggml.scores"),
|
||||||
|
Types: c.Uints("tokenizer.ggml.token_type"),
|
||||||
|
BOS: int32(c.Uint("tokenizer.ggml.bos_token_id")),
|
||||||
|
EOS: int32(c.Uint("tokenizer.ggml.eos_token_id")),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Layers: make([]Layer, c.Uint("block_count")),
|
||||||
|
Options: &Options{
|
||||||
|
hiddenSize: int(c.Uint("embedding_length")),
|
||||||
|
numHeads: int(c.Uint("attention.head_count")),
|
||||||
|
numKVHeads: int(c.Uint("attention.head_count_kv")),
|
||||||
|
attnKeyLen: int(c.Uint("attention.key_length")),
|
||||||
|
attnValLen: int(c.Uint("attention.value_length")),
|
||||||
|
eps: c.Float("attention.layer_norm_rms_epsilon"),
|
||||||
|
ropeBase: c.Float("rope.freq_base", 10000.0),
|
||||||
|
ropeScale: c.Float("rope.freq_scale", 1.0),
|
||||||
|
attnLogitSoftcap: c.Float("attn_logit_softcapping"),
|
||||||
|
finalLogitSoftcap: c.Float("final_logit_softcapping"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
slidingWindowLen := int32(c.Uint("attention.sliding_window"))
|
||||||
|
m.Cache = kvcache.NewWrapperCache(kvcache.NewSWACache(slidingWindowLen, m.Shift), kvcache.NewCausalCache(m.Shift))
|
||||||
|
|
||||||
|
return &m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type SelfAttention struct {
|
||||||
|
Query *nn.Linear `gguf:"attn_q"`
|
||||||
|
Key *nn.Linear `gguf:"attn_k"`
|
||||||
|
Value *nn.Linear `gguf:"attn_v"`
|
||||||
|
Output *nn.Linear `gguf:"attn_output"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
|
||||||
|
batchSize := hiddenState.Dim(1)
|
||||||
|
ropeType := uint32(2)
|
||||||
|
|
||||||
|
q := sa.Query.Forward(ctx, hiddenState)
|
||||||
|
q = q.Reshape(ctx, opts.attnKeyLen, opts.numHeads, batchSize)
|
||||||
|
q = q.RoPE(ctx, positionIDs, nil, uint32(opts.attnKeyLen), ropeType, opts.ropeBase, opts.ropeScale)
|
||||||
|
|
||||||
|
// todo: this should be 1.0/math.Sqrt(float64(headDim)) for 27B models
|
||||||
|
q = q.Scale(ctx, 1.0/math.Sqrt(float64(opts.attnKeyLen)))
|
||||||
|
|
||||||
|
k := sa.Key.Forward(ctx, hiddenState)
|
||||||
|
k = k.Reshape(ctx, opts.attnKeyLen, opts.numKVHeads, batchSize)
|
||||||
|
k = k.RoPE(ctx, positionIDs, nil, uint32(opts.attnKeyLen), ropeType, opts.ropeBase, opts.ropeScale)
|
||||||
|
|
||||||
|
v := sa.Value.Forward(ctx, hiddenState)
|
||||||
|
v = v.Reshape(ctx, opts.attnValLen, opts.numKVHeads, batchSize)
|
||||||
|
|
||||||
|
cache.Put(ctx, k, v)
|
||||||
|
k, v, mask := cache.Get(ctx)
|
||||||
|
|
||||||
|
q = q.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
|
||||||
|
k = k.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
|
||||||
|
v = v.Permute(ctx, 1, 2, 0, 3).Contiguous(ctx)
|
||||||
|
|
||||||
|
kq := k.Mulmat(ctx, q)
|
||||||
|
|
||||||
|
// logit softcap
|
||||||
|
kq = kq.Scale(ctx, 1.0/float64(opts.attnLogitSoftcap))
|
||||||
|
kq = kq.Tanh(ctx)
|
||||||
|
kq = kq.Scale(ctx, float64(opts.attnLogitSoftcap))
|
||||||
|
|
||||||
|
kq = kq.Add(ctx, mask)
|
||||||
|
kq = kq.Softmax(ctx)
|
||||||
|
|
||||||
|
kqv := v.Mulmat(ctx, kq)
|
||||||
|
kqv = kqv.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
|
||||||
|
kqv = kqv.Reshape(ctx, opts.attnValLen*opts.numHeads, batchSize)
|
||||||
|
|
||||||
|
return sa.Output.Forward(ctx, kqv)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
|
||||||
|
return key.RoPE(ctx, shift, nil, uint32(m.Options.attnKeyLen), uint32(2), m.Options.ropeBase, m.Options.ropeScale), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type MLP struct {
|
||||||
|
Up *nn.Linear `gguf:"ffn_up"`
|
||||||
|
Down *nn.Linear `gguf:"ffn_down"`
|
||||||
|
Gate *nn.Linear `gguf:"ffn_gate"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mlp *MLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *Options) ml.Tensor {
|
||||||
|
hiddenState = mlp.Gate.Forward(ctx, hiddenState).GELU(ctx).Mul(ctx, mlp.Up.Forward(ctx, hiddenState))
|
||||||
|
return mlp.Down.Forward(ctx, hiddenState)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Layer struct {
|
||||||
|
AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
|
||||||
|
SelfAttention *SelfAttention
|
||||||
|
PostAttentionNorm *nn.RMSNorm `gguf:"post_attention_norm"`
|
||||||
|
MLPNorm *nn.RMSNorm `gguf:"ffn_norm"`
|
||||||
|
MLP *MLP
|
||||||
|
PostMLPNorm *nn.RMSNorm `gguf:"post_ffw_norm"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Layer) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
|
||||||
|
residual := hiddenState
|
||||||
|
|
||||||
|
hiddenState = l.AttentionNorm.Forward(ctx, hiddenState, opts.eps)
|
||||||
|
hiddenState = l.SelfAttention.Forward(ctx, hiddenState, positionIDs, cache, opts)
|
||||||
|
hiddenState = l.PostAttentionNorm.Forward(ctx, hiddenState, opts.eps)
|
||||||
|
hiddenState = hiddenState.Add(ctx, residual)
|
||||||
|
residual = hiddenState
|
||||||
|
|
||||||
|
hiddenState = l.MLPNorm.Forward(ctx, hiddenState, opts.eps)
|
||||||
|
hiddenState = l.MLP.Forward(ctx, hiddenState, opts)
|
||||||
|
hiddenState = l.PostMLPNorm.Forward(ctx, hiddenState, opts.eps)
|
||||||
|
return hiddenState.Add(ctx, residual)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Model) Forward(ctx ml.Context, opts model.Options) (ml.Tensor, error) {
|
||||||
|
inputs, err := ctx.FromIntSlice(opts.Inputs, len(opts.Inputs))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
positions, err := ctx.FromIntSlice(opts.Positions, len(opts.Positions))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
hiddenState := m.TokenEmbedding.Forward(ctx, inputs)
|
||||||
|
hiddenState = hiddenState.Scale(ctx, math.Sqrt(float64(m.Options.hiddenSize)))
|
||||||
|
|
||||||
|
for i, layer := range m.Layers {
|
||||||
|
cacheType := i % 2
|
||||||
|
m.Cache.SetLayer(i)
|
||||||
|
wc := m.Cache.(*kvcache.WrapperCache)
|
||||||
|
wc.SetLayerType(cacheType)
|
||||||
|
hiddenState = layer.Forward(ctx, hiddenState, positions, m.Cache, m.Options)
|
||||||
|
}
|
||||||
|
|
||||||
|
hiddenState = m.OutputNorm.Forward(ctx, hiddenState, m.eps)
|
||||||
|
hiddenState = m.Output.Forward(ctx, hiddenState)
|
||||||
|
|
||||||
|
// final logit softcap
|
||||||
|
hiddenState = hiddenState.Scale(ctx, 1.0/float64(m.Options.finalLogitSoftcap))
|
||||||
|
hiddenState = hiddenState.Tanh(ctx)
|
||||||
|
hiddenState = hiddenState.Scale(ctx, float64(m.Options.finalLogitSoftcap))
|
||||||
|
|
||||||
|
outputs, err := ctx.FromIntSlice(opts.Outputs, len(opts.Outputs))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return hiddenState.Rows(ctx, outputs), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
model.Register("gemma2", New)
|
||||||
|
}
|
||||||
@@ -67,14 +67,15 @@ type SelfAttention struct {
|
|||||||
func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
|
func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
|
||||||
batchSize := hiddenState.Dim(1)
|
batchSize := hiddenState.Dim(1)
|
||||||
headDim := opts.hiddenSize / opts.numHeads
|
headDim := opts.hiddenSize / opts.numHeads
|
||||||
|
ropeType := uint32(0)
|
||||||
|
|
||||||
q := sa.Query.Forward(ctx, hiddenState)
|
q := sa.Query.Forward(ctx, hiddenState)
|
||||||
q = q.Reshape(ctx, headDim, opts.numHeads, batchSize)
|
q = q.Reshape(ctx, headDim, opts.numHeads, batchSize)
|
||||||
q = q.RoPE(ctx, positionIDs, opts.RopeFactors, opts.ropeDim, opts.ropeBase, opts.ropeScale)
|
q = q.RoPE(ctx, positionIDs, opts.RopeFactors, opts.ropeDim, ropeType, opts.ropeBase, opts.ropeScale)
|
||||||
|
|
||||||
k := sa.Key.Forward(ctx, hiddenState)
|
k := sa.Key.Forward(ctx, hiddenState)
|
||||||
k = k.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
k = k.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
||||||
k = k.RoPE(ctx, positionIDs, opts.RopeFactors, opts.ropeDim, opts.ropeBase, opts.ropeScale)
|
k = k.RoPE(ctx, positionIDs, opts.RopeFactors, opts.ropeDim, ropeType, opts.ropeBase, opts.ropeScale)
|
||||||
|
|
||||||
v := sa.Value.Forward(ctx, hiddenState)
|
v := sa.Value.Forward(ctx, hiddenState)
|
||||||
v = v.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
v = v.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
||||||
@@ -99,7 +100,7 @@ func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Ten
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
|
func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
|
||||||
return key.RoPE(ctx, shift, m.Options.RopeFactors, m.Options.ropeDim, m.Options.ropeBase, m.Options.ropeScale), nil
|
return key.RoPE(ctx, shift, m.Options.RopeFactors, m.Options.ropeDim, uint32(0), m.Options.ropeBase, m.Options.ropeScale), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type MLP struct {
|
type MLP struct {
|
||||||
|
|||||||
@@ -19,14 +19,15 @@ type TextSelfAttention struct {
|
|||||||
func (sa *TextSelfAttention) Forward(ctx ml.Context, hiddenState, positions, _ ml.Tensor, cache *kvcache.WrapperCache, opts *TextModelOptions) ml.Tensor {
|
func (sa *TextSelfAttention) Forward(ctx ml.Context, hiddenState, positions, _ ml.Tensor, cache *kvcache.WrapperCache, opts *TextModelOptions) ml.Tensor {
|
||||||
batchSize := hiddenState.Dim(1)
|
batchSize := hiddenState.Dim(1)
|
||||||
headDim := opts.hiddenSize / opts.numHeads
|
headDim := opts.hiddenSize / opts.numHeads
|
||||||
|
ropeType := uint32(0)
|
||||||
|
|
||||||
query := sa.Query.Forward(ctx, hiddenState)
|
query := sa.Query.Forward(ctx, hiddenState)
|
||||||
query = query.Reshape(ctx, headDim, opts.numHeads, batchSize)
|
query = query.Reshape(ctx, headDim, opts.numHeads, batchSize)
|
||||||
query = query.RoPE(ctx, positions, opts.RopeFactors, opts.ropeDim, opts.ropeBase, opts.ropeScale)
|
query = query.RoPE(ctx, positions, opts.RopeFactors, opts.ropeDim, ropeType, opts.ropeBase, opts.ropeScale)
|
||||||
|
|
||||||
key := sa.Key.Forward(ctx, hiddenState)
|
key := sa.Key.Forward(ctx, hiddenState)
|
||||||
key = key.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
key = key.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
||||||
key = key.RoPE(ctx, positions, opts.RopeFactors, opts.ropeDim, opts.ropeBase, opts.ropeScale)
|
key = key.RoPE(ctx, positions, opts.RopeFactors, opts.ropeDim, ropeType, opts.ropeBase, opts.ropeScale)
|
||||||
|
|
||||||
value := sa.Value.Forward(ctx, hiddenState)
|
value := sa.Value.Forward(ctx, hiddenState)
|
||||||
value = value.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
value = value.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
||||||
@@ -52,7 +53,7 @@ func (sa *TextSelfAttention) Forward(ctx ml.Context, hiddenState, positions, _ m
|
|||||||
|
|
||||||
func (m *TextModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
|
func (m *TextModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
|
||||||
// This will only get called for layers in the cache, which are just the self attention layers
|
// This will only get called for layers in the cache, which are just the self attention layers
|
||||||
return key.RoPE(ctx, shift, m.RopeFactors, m.ropeDim, m.ropeBase, m.ropeScale), nil
|
return key.RoPE(ctx, shift, m.RopeFactors, m.ropeDim, uint32(0), m.ropeBase, m.ropeScale), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type TextMLP struct {
|
type TextMLP struct {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package models
|
package models
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
_ "github.com/ollama/ollama/model/models/gemma2"
|
||||||
_ "github.com/ollama/ollama/model/models/llama"
|
_ "github.com/ollama/ollama/model/models/llama"
|
||||||
_ "github.com/ollama/ollama/model/models/mllama"
|
_ "github.com/ollama/ollama/model/models/mllama"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -18,6 +18,15 @@ const (
|
|||||||
SpecialEOS
|
SpecialEOS
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
TOKEN_TYPE_NORMAL = iota + 1
|
||||||
|
TOKEN_TYPE_UNKNOWN
|
||||||
|
TOKEN_TYPE_CONTROL
|
||||||
|
TOKEN_TYPE_USER_DEFINED
|
||||||
|
TOKEN_TYPE_UNUSED
|
||||||
|
TOKEN_TYPE_BYTE
|
||||||
|
)
|
||||||
|
|
||||||
type TextProcessor interface {
|
type TextProcessor interface {
|
||||||
Encode(string) ([]int32, error)
|
Encode(string) ([]int32, error)
|
||||||
Decode([]int32) (string, error)
|
Decode([]int32) (string, error)
|
||||||
@@ -27,7 +36,7 @@ type TextProcessor interface {
|
|||||||
type Vocabulary struct {
|
type Vocabulary struct {
|
||||||
Values []string
|
Values []string
|
||||||
Types []uint32
|
Types []uint32
|
||||||
Scores []uint32
|
Scores []float32
|
||||||
Merges []string
|
Merges []string
|
||||||
|
|
||||||
BOS, EOS int32
|
BOS, EOS int32
|
||||||
@@ -75,7 +84,7 @@ func (v *Vocabulary) Decode(id int32) string {
|
|||||||
func (v *Vocabulary) SpecialVocabulary() []string {
|
func (v *Vocabulary) SpecialVocabulary() []string {
|
||||||
v.specialOnce.Do(func() {
|
v.specialOnce.Do(func() {
|
||||||
for i := range v.Values {
|
for i := range v.Values {
|
||||||
if v.Types[i] == 3 {
|
if v.Types[i] == TOKEN_TYPE_CONTROL {
|
||||||
v.special = append(v.special, v.Values[i])
|
v.special = append(v.special, v.Values[i])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
220
model/process_text_spm.go
Normal file
220
model/process_text_spm.go
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
package model
|
||||||
|
|
||||||
|
import (
|
||||||
|
"iter"
|
||||||
|
"log/slog"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/dlclark/regexp2"
|
||||||
|
queue "github.com/emirpasic/gods/queues/priorityqueue"
|
||||||
|
)
|
||||||
|
|
||||||
|
const spmWhitespaceSep = "▁"
|
||||||
|
|
||||||
|
func replaceWhitespaceBySeperator(s string) string {
|
||||||
|
return strings.ReplaceAll(s, " ", spmWhitespaceSep)
|
||||||
|
}
|
||||||
|
|
||||||
|
type SentencePieceModel struct {
|
||||||
|
maxTokenLen int
|
||||||
|
pre *regexp2.Regexp
|
||||||
|
vocab *Vocabulary
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewSentencePieceModel(pre string, vocab *Vocabulary) SentencePieceModel {
|
||||||
|
slog.Debug("Tokens", "num tokens", len(vocab.Values), "vals", vocab.Values[:3], "scores", vocab.Scores[:3], "types", vocab.Types[:3])
|
||||||
|
|
||||||
|
counter := map[int]int{}
|
||||||
|
var maxTokenLen int
|
||||||
|
for cnt := range vocab.Types {
|
||||||
|
switch vocab.Types[cnt] {
|
||||||
|
case TOKEN_TYPE_NORMAL, TOKEN_TYPE_USER_DEFINED, TOKEN_TYPE_UNUSED:
|
||||||
|
maxTokenLen = max(maxTokenLen, len(vocab.Values[cnt]))
|
||||||
|
fallthrough
|
||||||
|
default:
|
||||||
|
counter[int(vocab.Types[cnt])] += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Debug("Token counts", "normal", counter[TOKEN_TYPE_NORMAL], "unknown", counter[TOKEN_TYPE_UNKNOWN], "control", counter[TOKEN_TYPE_CONTROL],
|
||||||
|
"user defined", counter[TOKEN_TYPE_USER_DEFINED], "unused", counter[TOKEN_TYPE_UNUSED], "byte", counter[TOKEN_TYPE_BYTE],
|
||||||
|
"max token len", maxTokenLen)
|
||||||
|
|
||||||
|
return SentencePieceModel{
|
||||||
|
maxTokenLen: maxTokenLen,
|
||||||
|
pre: regexp2.MustCompile(pre, regexp2.Unicode|regexp2.RE2),
|
||||||
|
vocab: vocab,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (spm SentencePieceModel) Is(id int32, special Special) bool {
|
||||||
|
return spm.vocab.Is(id, special)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (spm *SentencePieceModel) split(s string) iter.Seq[string] {
|
||||||
|
return func(yield func(string) bool) {
|
||||||
|
for m, _ := spm.pre.FindStringMatch(s); m != nil; m, _ = spm.pre.FindNextMatch(m) {
|
||||||
|
if !yield(m.String()) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (spm SentencePieceModel) Encode(s string) ([]int32, error) {
|
||||||
|
fragments := []fragment{{value: s}}
|
||||||
|
for _, special := range spm.vocab.SpecialVocabulary() {
|
||||||
|
// TODO: process special tokens concurrently
|
||||||
|
id := spm.vocab.Encode(special)
|
||||||
|
for i := 0; i < len(fragments); i++ {
|
||||||
|
frag := fragments[i]
|
||||||
|
if len(frag.ids) > 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var middle []fragment
|
||||||
|
switch i := strings.Index(frag.value, special); {
|
||||||
|
case i < 0:
|
||||||
|
middle = append(middle, frag)
|
||||||
|
case i > 0:
|
||||||
|
middle = append(middle, fragment{value: frag.value[:i]})
|
||||||
|
fallthrough
|
||||||
|
default:
|
||||||
|
middle = append(middle, fragment{value: special, ids: []int32{id}})
|
||||||
|
if rest := frag.value[i+len(special):]; rest != "" {
|
||||||
|
middle = append(middle, fragment{value: rest})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragments = append(fragments[:i], append(middle, fragments[i+1:]...)...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slog.Debug("fragments", "frags", fragments)
|
||||||
|
|
||||||
|
var ids []int32
|
||||||
|
for _, frag := range fragments {
|
||||||
|
if len(frag.ids) > 0 {
|
||||||
|
ids = append(ids, frag.ids...)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for split := range spm.split(frag.value) {
|
||||||
|
split = replaceWhitespaceBySeperator(split)
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.Write([]byte(split))
|
||||||
|
if id := spm.vocab.Encode(sb.String()); id >= 0 {
|
||||||
|
ids = append(ids, id)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
runes := []rune(sb.String())
|
||||||
|
pq := queue.NewWith(func(a, b any) int {
|
||||||
|
priA := a.(*candidate)
|
||||||
|
priB := b.(*candidate)
|
||||||
|
if priA.score > priB.score || (priA.score == priB.score && priA.a < priB.a) {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
})
|
||||||
|
|
||||||
|
merges := make([]merge, len(runes))
|
||||||
|
for r := range runes {
|
||||||
|
merges[r] = merge{
|
||||||
|
p: r - 1,
|
||||||
|
n: r + 1,
|
||||||
|
runes: []rune{runes[r]},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pairwise := func(a, b int) *candidate {
|
||||||
|
if a < 0 || b >= len(runes) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
left, right := string(merges[a].runes), string(merges[b].runes)
|
||||||
|
if id := spm.vocab.Encode(left + right); id >= 0 {
|
||||||
|
return &candidate{
|
||||||
|
a: a,
|
||||||
|
b: b,
|
||||||
|
length: len(left + " " + right),
|
||||||
|
score: spm.vocab.Scores[id],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range len(runes) - 1 {
|
||||||
|
if pair := pairwise(i, i+1); pair != nil {
|
||||||
|
pq.Enqueue(pair)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pqv := pq.Values()
|
||||||
|
for _, v := range pqv {
|
||||||
|
e := v.(*candidate)
|
||||||
|
slog.Debug("candidate", "candidate", e)
|
||||||
|
}
|
||||||
|
|
||||||
|
for !pq.Empty() {
|
||||||
|
v, _ := pq.Dequeue()
|
||||||
|
pair := v.(*candidate)
|
||||||
|
left, right := merges[pair.a], merges[pair.b]
|
||||||
|
|
||||||
|
if len(left.runes) == 0 || len(right.runes) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
merges[pair.a].runes = append(left.runes, right.runes...)
|
||||||
|
merges[pair.b].runes = nil
|
||||||
|
merges[pair.a].n = right.n
|
||||||
|
if right.n < len(merges) {
|
||||||
|
merges[right.n].p = pair.a
|
||||||
|
}
|
||||||
|
|
||||||
|
if pair := pairwise(merges[pair.a].p, pair.a); pair != nil {
|
||||||
|
pq.Enqueue(pair)
|
||||||
|
}
|
||||||
|
|
||||||
|
if pair := pairwise(pair.a, merges[pair.a].n); pair != nil {
|
||||||
|
pq.Enqueue(pair)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Debug("merges", "merges", merges)
|
||||||
|
|
||||||
|
for _, merge := range merges {
|
||||||
|
if len(merge.runes) > 0 {
|
||||||
|
if id := spm.vocab.Encode(string(merge.runes)); id >= 0 {
|
||||||
|
ids = append(ids, id)
|
||||||
|
} else {
|
||||||
|
slog.Debug("missing token", "token", string(merge.runes))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slog.Debug("encoded", "ids", ids)
|
||||||
|
|
||||||
|
return ids, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type candidate struct {
|
||||||
|
a, b int
|
||||||
|
score float32
|
||||||
|
length int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (spm SentencePieceModel) Decode(ids []int32) (string, error) {
|
||||||
|
var sb strings.Builder
|
||||||
|
for _, id := range ids {
|
||||||
|
data := spm.vocab.Decode(id)
|
||||||
|
data = strings.ReplaceAll(data, spmWhitespaceSep, " ")
|
||||||
|
if _, err := sb.WriteString(data); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Debug("decoded", "ids", ids, "text", sb.String())
|
||||||
|
return sb.String(), nil
|
||||||
|
}
|
||||||
@@ -813,6 +813,8 @@ func (s *Server) loadModel(
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
slog.Info("system", "info", s.model.Backend().SystemInfo() /* "threads", *threads */)
|
||||||
|
|
||||||
// TODO(jessegross): LoRA loading
|
// TODO(jessegross): LoRA loading
|
||||||
if lpath.String() != "" {
|
if lpath.String() != "" {
|
||||||
panic("loras are not yet implemented")
|
panic("loras are not yet implemented")
|
||||||
@@ -881,7 +883,6 @@ func Execute(args []string) error {
|
|||||||
})
|
})
|
||||||
slog.SetDefault(slog.New(handler))
|
slog.SetDefault(slog.New(handler))
|
||||||
slog.Info("starting ollama engine")
|
slog.Info("starting ollama engine")
|
||||||
// TODO(jessegross): Some system info would be useful
|
|
||||||
|
|
||||||
server := &Server{
|
server := &Server{
|
||||||
batchSize: *batchSize,
|
batchSize: *batchSize,
|
||||||
|
|||||||
Reference in New Issue
Block a user