mirror of
https://github.com/ollama/ollama.git
synced 2026-04-21 16:25:42 +02:00
new runner
This commit is contained in:
96
runner/common/stop.go
Normal file
96
runner/common/stop.go
Normal file
@@ -0,0 +1,96 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func FindStop(sequence string, stops []string) (bool, string) {
|
||||
for _, stop := range stops {
|
||||
if strings.Contains(sequence, stop) {
|
||||
return true, stop
|
||||
}
|
||||
}
|
||||
|
||||
return false, ""
|
||||
}
|
||||
|
||||
func ContainsStopSuffix(sequence string, stops []string) bool {
|
||||
for _, stop := range stops {
|
||||
for i := 1; i <= len(stop); i++ {
|
||||
if strings.HasSuffix(sequence, stop[:i]) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// truncateStop removes the provided stop string from pieces,
|
||||
// returning the partial pieces with stop removed, including truncating
|
||||
// the last piece if required (and signalling if this was the case)
|
||||
func TruncateStop(pieces []string, stop string) ([]string, bool) {
|
||||
joined := strings.Join(pieces, "")
|
||||
|
||||
index := strings.Index(joined, stop)
|
||||
if index == -1 {
|
||||
return pieces, false
|
||||
}
|
||||
|
||||
joined = joined[:index]
|
||||
|
||||
// Split truncated string back into pieces of original lengths
|
||||
lengths := make([]int, len(pieces))
|
||||
for i, piece := range pieces {
|
||||
lengths[i] = len(piece)
|
||||
}
|
||||
|
||||
var result []string
|
||||
tokenTruncated := false
|
||||
start := 0
|
||||
for _, length := range lengths {
|
||||
if start >= len(joined) {
|
||||
break
|
||||
}
|
||||
|
||||
end := start + length
|
||||
if end > len(joined) {
|
||||
end = len(joined)
|
||||
tokenTruncated = true
|
||||
}
|
||||
result = append(result, joined[start:end])
|
||||
start = end
|
||||
}
|
||||
|
||||
return result, tokenTruncated
|
||||
}
|
||||
|
||||
func IncompleteUnicode(token string) bool {
|
||||
incomplete := false
|
||||
|
||||
// check if there is incomplete UTF-8 character at the end
|
||||
for i := 1; i < 5 && i <= len(token); i++ {
|
||||
c := token[len(token)-i]
|
||||
|
||||
if (c & 0xc0) == 0x80 {
|
||||
// continuation byte: 10xxxxxx
|
||||
continue
|
||||
}
|
||||
|
||||
if (c & 0xe0) == 0xc0 {
|
||||
// 2-byte character: 110xxxxx ...
|
||||
incomplete = i < 2
|
||||
} else if (c & 0xf0) == 0xe0 {
|
||||
// 3-byte character: 1110xxxx ...
|
||||
incomplete = i < 3
|
||||
} else if (c & 0xf8) == 0xf0 {
|
||||
// 4-byte character: 11110xxx ...
|
||||
incomplete = i < 4
|
||||
}
|
||||
|
||||
// else 1-byte character or invalid byte
|
||||
break
|
||||
}
|
||||
|
||||
return incomplete
|
||||
}
|
||||
129
runner/common/stop_test.go
Normal file
129
runner/common/stop_test.go
Normal file
@@ -0,0 +1,129 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTruncateStop(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
pieces []string
|
||||
stop string
|
||||
expected []string
|
||||
expectedTrunc bool
|
||||
}{
|
||||
{
|
||||
name: "Single word",
|
||||
pieces: []string{"hello", "world"},
|
||||
stop: "world",
|
||||
expected: []string{"hello"},
|
||||
expectedTrunc: false,
|
||||
},
|
||||
{
|
||||
name: "Partial",
|
||||
pieces: []string{"hello", "wor"},
|
||||
stop: "or",
|
||||
expected: []string{"hello", "w"},
|
||||
expectedTrunc: true,
|
||||
},
|
||||
{
|
||||
name: "Suffix",
|
||||
pieces: []string{"Hello", " there", "!"},
|
||||
stop: "!",
|
||||
expected: []string{"Hello", " there"},
|
||||
expectedTrunc: false,
|
||||
},
|
||||
{
|
||||
name: "Suffix partial",
|
||||
pieces: []string{"Hello", " the", "re!"},
|
||||
stop: "there!",
|
||||
expected: []string{"Hello", " "},
|
||||
expectedTrunc: true,
|
||||
},
|
||||
{
|
||||
name: "Middle",
|
||||
pieces: []string{"hello", " wor"},
|
||||
stop: "llo w",
|
||||
expected: []string{"he"},
|
||||
expectedTrunc: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, resultTrunc := TruncateStop(tt.pieces, tt.stop)
|
||||
if !reflect.DeepEqual(result, tt.expected) || resultTrunc != tt.expectedTrunc {
|
||||
t.Errorf("truncateStop(%v, %s): have %v (%v); want %v (%v)", tt.pieces, tt.stop, result, resultTrunc, tt.expected, tt.expectedTrunc)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIncompleteUnicode(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "Basic",
|
||||
input: "hi",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "Two byte",
|
||||
input: "hi" + string([]byte{0xc2, 0xa3}),
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "Two byte - missing last",
|
||||
input: "hi" + string([]byte{0xc2}),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Three byte",
|
||||
input: "hi" + string([]byte{0xe0, 0xA0, 0x80}),
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "Three byte - missing last",
|
||||
input: "hi" + string([]byte{0xe0, 0xA0}),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Three byte - missing last 2",
|
||||
input: "hi" + string([]byte{0xe0}),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Four byte",
|
||||
input: "hi" + string([]byte{0xf0, 0x92, 0x8a, 0xb7}),
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "Four byte - missing last",
|
||||
input: "hi" + string([]byte{0xf0, 0x92, 0x8a}),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Four byte - missing last 2",
|
||||
input: "hi" + string([]byte{0xf0, 0x92}),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Four byte - missing last 3",
|
||||
input: "hi" + string([]byte{0xf0}),
|
||||
expected: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := IncompleteUnicode(tt.input)
|
||||
if result != tt.expected {
|
||||
t.Errorf("incompleteUnicode(%s): have %v; want %v", tt.input, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user