mirror of
https://github.com/ollama/ollama.git
synced 2026-04-27 03:05:43 +02:00
Compare commits
8 Commits
parth/opt-
...
v0.9.3-rc0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
10a8e04a8d | ||
|
|
1c6669e64c | ||
|
|
2bb69b40c7 | ||
|
|
65bff664cb | ||
|
|
c088ac0e79 | ||
|
|
0a066cfd91 | ||
|
|
87b7af6cee | ||
|
|
f2527b08fb |
7
.github/workflows/release.yaml
vendored
7
.github/workflows/release.yaml
vendored
@@ -103,11 +103,6 @@ jobs:
|
|||||||
arch: [amd64]
|
arch: [amd64]
|
||||||
preset: ['CPU']
|
preset: ['CPU']
|
||||||
include:
|
include:
|
||||||
- os: windows
|
|
||||||
arch: amd64
|
|
||||||
preset: 'CUDA 11'
|
|
||||||
install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
|
|
||||||
cuda-version: '11.3'
|
|
||||||
- os: windows
|
- os: windows
|
||||||
arch: amd64
|
arch: amd64
|
||||||
preset: 'CUDA 12'
|
preset: 'CUDA 12'
|
||||||
@@ -324,8 +319,6 @@ jobs:
|
|||||||
case "$COMPONENT" in
|
case "$COMPONENT" in
|
||||||
bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||||
lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||||
lib/ollama/cuda_v11) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
|
||||||
lib/ollama/cuda_v12) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
|
||||||
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
|
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
|
||||||
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
|
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
|
||||||
lib/ollama/rocm) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
|
lib/ollama/rocm) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
|
||||||
|
|||||||
6
.github/workflows/test.yaml
vendored
6
.github/workflows/test.yaml
vendored
@@ -46,7 +46,7 @@ jobs:
|
|||||||
include:
|
include:
|
||||||
- preset: CPU
|
- preset: CPU
|
||||||
- preset: CUDA
|
- preset: CUDA
|
||||||
container: nvidia/cuda:11.8.0-devel-ubuntu22.04
|
container: nvidia/cuda:12.8.1-devel-ubuntu22.04
|
||||||
flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
|
flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
|
||||||
- preset: ROCm
|
- preset: ROCm
|
||||||
container: rocm/dev-ubuntu-22.04:6.1.2
|
container: rocm/dev-ubuntu-22.04:6.1.2
|
||||||
@@ -78,7 +78,7 @@ jobs:
|
|||||||
include:
|
include:
|
||||||
- preset: CPU
|
- preset: CPU
|
||||||
- preset: CUDA
|
- preset: CUDA
|
||||||
install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
|
install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe
|
||||||
flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
|
flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
|
||||||
- preset: ROCm
|
- preset: ROCm
|
||||||
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
|
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
|
||||||
@@ -102,7 +102,7 @@ jobs:
|
|||||||
$ErrorActionPreference = "Stop"
|
$ErrorActionPreference = "Stop"
|
||||||
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
|
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
|
||||||
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
|
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
|
||||||
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_11.3", "nvcc_11.3", "cublas_11.3", "cublas_dev_11.3")) -NoNewWindow -Wait
|
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_12.8", "nvcc_12.8", "cublas_12.8", "cublas_dev_12.8")) -NoNewWindow -Wait
|
||||||
}
|
}
|
||||||
|
|
||||||
$cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path
|
$cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path
|
||||||
|
|||||||
@@ -78,14 +78,13 @@ if(CMAKE_CUDA_COMPILER)
|
|||||||
|
|
||||||
find_package(CUDAToolkit)
|
find_package(CUDAToolkit)
|
||||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
|
||||||
set(OLLAMA_CUDA_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/cuda_v${CUDAToolkit_VERSION_MAJOR})
|
|
||||||
install(TARGETS ggml-cuda
|
install(TARGETS ggml-cuda
|
||||||
RUNTIME_DEPENDENCIES
|
RUNTIME_DEPENDENCIES
|
||||||
DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_LIBRARY_DIR}
|
DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_LIBRARY_DIR}
|
||||||
PRE_INCLUDE_REGEXES cublas cublasLt cudart
|
PRE_INCLUDE_REGEXES cublas cublasLt cudart
|
||||||
PRE_EXCLUDE_REGEXES ".*"
|
PRE_EXCLUDE_REGEXES ".*"
|
||||||
RUNTIME DESTINATION ${OLLAMA_CUDA_INSTALL_DIR} COMPONENT CUDA
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CUDA
|
||||||
LIBRARY DESTINATION ${OLLAMA_CUDA_INSTALL_DIR} COMPONENT CUDA
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CUDA
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@@ -116,7 +115,11 @@ if(CMAKE_HIP_COMPILER)
|
|||||||
|
|
||||||
set(OLLAMA_HIP_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/rocm)
|
set(OLLAMA_HIP_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/rocm)
|
||||||
install(TARGETS ggml-hip
|
install(TARGETS ggml-hip
|
||||||
RUNTIME_DEPENDENCIES
|
RUNTIME_DEPENDENCY_SET rocm
|
||||||
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
||||||
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
||||||
|
)
|
||||||
|
install(RUNTIME_DEPENDENCY_SET rocm
|
||||||
DIRECTORIES ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR}
|
DIRECTORIES ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR}
|
||||||
PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf
|
PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf
|
||||||
PRE_EXCLUDE_REGEXES ".*"
|
PRE_EXCLUDE_REGEXES ".*"
|
||||||
|
|||||||
@@ -17,20 +17,12 @@
|
|||||||
"name": "CUDA",
|
"name": "CUDA",
|
||||||
"inherits": [ "Default" ]
|
"inherits": [ "Default" ]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "CUDA 11",
|
|
||||||
"inherits": [ "CUDA" ],
|
|
||||||
"cacheVariables": {
|
|
||||||
"CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;70;75;80;86",
|
|
||||||
"CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "CUDA 12",
|
"name": "CUDA 12",
|
||||||
"inherits": [ "CUDA" ],
|
"inherits": [ "CUDA" ],
|
||||||
"cacheVariables": {
|
"cacheVariables": {
|
||||||
"CMAKE_CUDA_ARCHITECTURES": "50;60;61;70;75;80;86;87;89;90;90a;120",
|
"CMAKE_CUDA_ARCHITECTURES": "50;60;61;70;75;80;86;87;89;90;90a;120",
|
||||||
"CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets"
|
"CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets -t 2"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -58,6 +50,7 @@
|
|||||||
"name": "ROCm 6",
|
"name": "ROCm 6",
|
||||||
"inherits": [ "ROCm" ],
|
"inherits": [ "ROCm" ],
|
||||||
"cacheVariables": {
|
"cacheVariables": {
|
||||||
|
"CMAKE_HIP_FLAGS": "-parallel-jobs=4",
|
||||||
"AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-"
|
"AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -78,11 +71,6 @@
|
|||||||
"configurePreset": "CUDA",
|
"configurePreset": "CUDA",
|
||||||
"targets": [ "ggml-cuda" ]
|
"targets": [ "ggml-cuda" ]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "CUDA 11",
|
|
||||||
"inherits": [ "CUDA" ],
|
|
||||||
"configurePreset": "CUDA 11"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "CUDA 12",
|
"name": "CUDA 12",
|
||||||
"inherits": [ "CUDA" ],
|
"inherits": [ "CUDA" ],
|
||||||
|
|||||||
24
Dockerfile
24
Dockerfile
@@ -7,12 +7,13 @@ ARG JETPACK5VERSION=r35.4.1
|
|||||||
ARG JETPACK6VERSION=r36.4.0
|
ARG JETPACK6VERSION=r36.4.0
|
||||||
ARG CMAKEVERSION=3.31.2
|
ARG CMAKEVERSION=3.31.2
|
||||||
|
|
||||||
# CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
|
# We require gcc v10 minimum. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
|
||||||
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
|
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
|
||||||
RUN yum install -y yum-utils \
|
RUN yum install -y yum-utils \
|
||||||
&& yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
|
&& yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
|
||||||
&& rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
|
&& rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
|
||||||
&& dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
|
&& dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
|
||||||
|
&& dnf install -y ccache \
|
||||||
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
|
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
|
||||||
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
||||||
|
|
||||||
@@ -38,15 +39,6 @@ RUN --mount=type=cache,target=/root/.ccache \
|
|||||||
&& cmake --build --parallel --preset 'CPU' \
|
&& cmake --build --parallel --preset 'CPU' \
|
||||||
&& cmake --install build --component CPU --strip --parallel 8
|
&& cmake --install build --component CPU --strip --parallel 8
|
||||||
|
|
||||||
FROM base AS cuda-11
|
|
||||||
ARG CUDA11VERSION=11.3
|
|
||||||
RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
|
|
||||||
ENV PATH=/usr/local/cuda-11/bin:$PATH
|
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
|
||||||
cmake --preset 'CUDA 11' \
|
|
||||||
&& cmake --build --parallel --preset 'CUDA 11' \
|
|
||||||
&& cmake --install build --component CUDA --strip --parallel 8
|
|
||||||
|
|
||||||
FROM base AS cuda-12
|
FROM base AS cuda-12
|
||||||
ARG CUDA12VERSION=12.8
|
ARG CUDA12VERSION=12.8
|
||||||
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
|
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
|
||||||
@@ -98,17 +90,15 @@ RUN --mount=type=cache,target=/root/.cache/go-build \
|
|||||||
go build -trimpath -buildmode=pie -o /bin/ollama .
|
go build -trimpath -buildmode=pie -o /bin/ollama .
|
||||||
|
|
||||||
FROM --platform=linux/amd64 scratch AS amd64
|
FROM --platform=linux/amd64 scratch AS amd64
|
||||||
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
|
COPY --from=cuda-12 dist/lib/ollama /lib/ollama
|
||||||
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
|
|
||||||
|
|
||||||
FROM --platform=linux/arm64 scratch AS arm64
|
FROM --platform=linux/arm64 scratch AS arm64
|
||||||
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
|
COPY --from=cuda-12 dist/lib/ollama /lib/ollama/cuda_sbsa
|
||||||
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
|
COPY --from=jetpack-5 dist/lib/ollama /lib/ollama/cuda_jetpack5
|
||||||
COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_jetpack5
|
COPY --from=jetpack-6 dist/lib/ollama /lib/ollama/cuda_jetpack6
|
||||||
COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_jetpack6
|
|
||||||
|
|
||||||
FROM scratch AS rocm
|
FROM scratch AS rocm
|
||||||
COPY --from=rocm-6 dist/lib/ollama/rocm /lib/ollama/rocm
|
COPY --from=rocm-6 dist/lib/ollama /lib/ollama
|
||||||
|
|
||||||
FROM ${FLAVOR} AS archive
|
FROM ${FLAVOR} AS archive
|
||||||
COPY --from=cpu dist/lib/ollama /lib/ollama
|
COPY --from=cpu dist/lib/ollama /lib/ollama
|
||||||
|
|||||||
@@ -409,6 +409,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [macLlama (macOS native)](https://github.com/hellotunamayo/macLlama) (A native macOS GUI application for interacting with Ollama models, featuring a chat interface.)
|
- [macLlama (macOS native)](https://github.com/hellotunamayo/macLlama) (A native macOS GUI application for interacting with Ollama models, featuring a chat interface.)
|
||||||
- [GPTranslate](https://github.com/philberndt/GPTranslate) (A fast and lightweight, AI powered desktop translation application written with Rust and Tauri. Features real-time translation with OpenAI/Azure/Ollama.)
|
- [GPTranslate](https://github.com/philberndt/GPTranslate) (A fast and lightweight, AI powered desktop translation application written with Rust and Tauri. Features real-time translation with OpenAI/Azure/Ollama.)
|
||||||
- [ollama launcher](https://github.com/NGC13009/ollama-launcher) (A launcher for Ollama, aiming to provide users with convenient functions such as ollama server launching, management, or configuration.)
|
- [ollama launcher](https://github.com/NGC13009/ollama-launcher) (A launcher for Ollama, aiming to provide users with convenient functions such as ollama server launching, management, or configuration.)
|
||||||
|
- [ai-hub](https://github.com/Aj-Seven/ai-hub) (AI Hub supports multiple models via API keys and Chat support via Ollama API.)
|
||||||
|
|
||||||
### Cloud
|
### Cloud
|
||||||
|
|
||||||
|
|||||||
@@ -2,9 +2,6 @@ package convert
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"slices"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/fs/ggml"
|
||||||
)
|
)
|
||||||
@@ -30,65 +27,38 @@ func (p *mixtralModel) KV(t *Tokenizer) ggml.KV {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (p *mixtralModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
func (p *mixtralModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
||||||
oldnew := []string{
|
merges := make([]merge, 0, p.NumHiddenLayers*6)
|
||||||
"model.layers", "blk",
|
for i := range p.NumHiddenLayers {
|
||||||
"w1", "ffn_gate_exps",
|
merges = append(merges, merge{
|
||||||
"w2", "ffn_down_exps",
|
fmt.Sprintf("blk.%d.*.w1.weight", i),
|
||||||
"w3", "ffn_up_exps",
|
fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
|
||||||
}
|
}, merge{
|
||||||
|
fmt.Sprintf("blk.%d.*.w1.bias", i),
|
||||||
for i := range p.NumLocalExperts {
|
fmt.Sprintf("blk.%d.ffn_gate_exps.bias", i),
|
||||||
oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".")
|
}, merge{
|
||||||
}
|
fmt.Sprintf("blk.%d.*.w2.weight", i),
|
||||||
|
fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
|
||||||
// group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor
|
}, merge{
|
||||||
namer := strings.NewReplacer(oldnew...)
|
fmt.Sprintf("blk.%d.*.w2.bias", i),
|
||||||
experts := make(map[string]experts)
|
fmt.Sprintf("blk.%d.ffn_up_exps.bias", i),
|
||||||
|
}, merge{
|
||||||
// merge experts into a single tensor while removing them from ts
|
fmt.Sprintf("blk.%d.*.w3.weight", i),
|
||||||
ts = slices.DeleteFunc(ts, func(t Tensor) bool {
|
fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
|
||||||
if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") {
|
}, merge{
|
||||||
return false
|
fmt.Sprintf("blk.%d.*.w3.bias", i),
|
||||||
}
|
fmt.Sprintf("blk.%d.ffn_down_exps.bias", i),
|
||||||
|
|
||||||
name := namer.Replace(t.Name())
|
|
||||||
experts[name] = append(experts[name], t)
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
|
|
||||||
var out []*ggml.Tensor
|
|
||||||
for n, e := range experts {
|
|
||||||
// TODO(mxyng): sanity check experts
|
|
||||||
out = append(out, &ggml.Tensor{
|
|
||||||
Name: n,
|
|
||||||
Kind: e[0].Kind(),
|
|
||||||
Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...),
|
|
||||||
WriterTo: e,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
out, ts := mergeTensors(ts, merges...)
|
||||||
return append(out, p.llamaModel.Tensors(ts)...)
|
return append(out, p.llamaModel.Tensors(ts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *mixtralModel) Replacements() []string {
|
func (p *mixtralModel) Replacements() []string {
|
||||||
return append(
|
return append(
|
||||||
p.llamaModel.Replacements(),
|
p.llamaModel.Replacements(),
|
||||||
|
"model.layers", "blk",
|
||||||
"block_sparse_moe.gate", "ffn_gate_inp",
|
"block_sparse_moe.gate", "ffn_gate_inp",
|
||||||
|
"block_sparse_moe.experts.", ".",
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
type experts []Tensor
|
|
||||||
|
|
||||||
func (e experts) WriteTo(w io.Writer) (int64, error) {
|
|
||||||
// TODO(mxyng): experts _should_ be numerically sorted by expert but this should check
|
|
||||||
for _, t := range e {
|
|
||||||
// the canonical merged experts tensor stacks all experts along a new, 0 axis,
|
|
||||||
// e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers
|
|
||||||
// this accomplishes the same thing by writing each expert tensor in sequence
|
|
||||||
if _, err := t.WriteTo(w); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0, nil
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -2,7 +2,9 @@ package convert
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"cmp"
|
"cmp"
|
||||||
|
"io"
|
||||||
"iter"
|
"iter"
|
||||||
|
"path"
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@@ -74,3 +76,54 @@ func splitDim(t Tensor, dim int, splits ...split) iter.Seq[*ggml.Tensor] {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type merge struct {
|
||||||
|
pattern, name string
|
||||||
|
}
|
||||||
|
|
||||||
|
// mergeTensors merges tensors that match a given pattern into a single tensor.
|
||||||
|
func mergeTensors(unmatched []Tensor, merges ...merge) (out []*ggml.Tensor, _ []Tensor) {
|
||||||
|
var matched []Tensor
|
||||||
|
for i := range merges {
|
||||||
|
matched, unmatched = slicesSplitFunc(unmatched, func(t Tensor) bool {
|
||||||
|
matched, _ := path.Match(merges[i].pattern, t.Name())
|
||||||
|
return matched
|
||||||
|
})
|
||||||
|
|
||||||
|
if len(matched) > 0 {
|
||||||
|
out = append(out, &ggml.Tensor{
|
||||||
|
Name: merges[i].name,
|
||||||
|
Kind: matched[0].Kind(),
|
||||||
|
Shape: append([]uint64{uint64(len(matched))}, matched[0].Shape()...),
|
||||||
|
WriterTo: mergeGroup(matched),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return out, unmatched
|
||||||
|
}
|
||||||
|
|
||||||
|
// slicesSplitFunc splits a slice into two slices based on a predicate function.
|
||||||
|
func slicesSplitFunc[S ~[]E, E comparable](s S, fn func(e E) bool) (matched, unmatched S) {
|
||||||
|
for _, e := range s {
|
||||||
|
if fn(e) {
|
||||||
|
matched = append(matched, e)
|
||||||
|
} else {
|
||||||
|
unmatched = append(unmatched, e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return matched, unmatched
|
||||||
|
}
|
||||||
|
|
||||||
|
type mergeGroup []Tensor
|
||||||
|
|
||||||
|
func (g mergeGroup) WriteTo(w io.Writer) (int64, error) {
|
||||||
|
for _, t := range g {
|
||||||
|
if _, err := t.WriteTo(w); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
"github.com/ollama/ollama/fs/ggml"
|
||||||
"github.com/pdevine/tensor"
|
"github.com/pdevine/tensor"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -302,3 +304,99 @@ func TestSplitDim(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMerge(t *testing.T) {
|
||||||
|
unmatched := []Tensor{
|
||||||
|
&fakeTensor{
|
||||||
|
name: "a.0.b",
|
||||||
|
shape: []uint64{5, 2},
|
||||||
|
data: []float32{10, 11, 12, 13, 14, 15, 16, 17, 18, 19},
|
||||||
|
},
|
||||||
|
&fakeTensor{
|
||||||
|
name: "a.1.b",
|
||||||
|
shape: []uint64{5, 2},
|
||||||
|
data: []float32{20, 21, 22, 23, 24, 25, 26, 27, 28, 29},
|
||||||
|
},
|
||||||
|
&fakeTensor{
|
||||||
|
name: "c.0.d",
|
||||||
|
shape: []uint64{5, 2},
|
||||||
|
data: []float32{30, 31, 32, 33, 34, 35, 36, 37, 38, 39},
|
||||||
|
},
|
||||||
|
&fakeTensor{
|
||||||
|
name: "c.1.d",
|
||||||
|
shape: []uint64{5, 2},
|
||||||
|
data: []float32{40, 41, 42, 43, 44, 45, 46, 47, 48, 49},
|
||||||
|
},
|
||||||
|
&fakeTensor{
|
||||||
|
name: "e.0.f",
|
||||||
|
shape: []uint64{5, 2},
|
||||||
|
data: []float32{50, 51, 52, 53, 54, 55, 56, 57, 58, 59},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
checkMatched := func(t *testing.T, n int, matched []*ggml.Tensor) {
|
||||||
|
for i := range n {
|
||||||
|
got := matched[i]
|
||||||
|
if diff := cmp.Diff([]uint64{2, 5, 2}, got.Shape); diff != "" {
|
||||||
|
t.Errorf("unexpected (-want +got):\n%s", diff)
|
||||||
|
}
|
||||||
|
|
||||||
|
var b bytes.Buffer
|
||||||
|
if _, err := got.WriteTo(&b); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
f32s := make([]float32, 20)
|
||||||
|
if err := binary.Read(&b, binary.LittleEndian, &f32s); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
offset := 10 + (i * 20)
|
||||||
|
want := make([]float32, 20)
|
||||||
|
for j := range 20 {
|
||||||
|
want[j] = float32(offset + j)
|
||||||
|
}
|
||||||
|
|
||||||
|
if diff := cmp.Diff(want, f32s); diff != "" {
|
||||||
|
t.Errorf("unexpected data (-want +got):\n%s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("single merge", func(t *testing.T) {
|
||||||
|
matched, unmatched := mergeTensors(unmatched, merge{"a.*.b", "a.b"})
|
||||||
|
if len(unmatched) != 3 {
|
||||||
|
t.Error("expected 3 remaining tensors, got", len(unmatched))
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(matched) != 1 {
|
||||||
|
t.Error("expected 1 merged tensor, got", len(matched))
|
||||||
|
}
|
||||||
|
|
||||||
|
checkMatched(t, 1, matched)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("multiple merges", func(t *testing.T) {
|
||||||
|
matched, unmatched := mergeTensors(unmatched, merge{"a.*.b", "a.b"}, merge{"c.*.d", "c.d"})
|
||||||
|
if len(unmatched) != 1 {
|
||||||
|
t.Error("expected 1 remaining tensors, got", len(unmatched))
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(matched) != 2 {
|
||||||
|
t.Error("expected 2 merged tensor, got", len(matched))
|
||||||
|
}
|
||||||
|
|
||||||
|
checkMatched(t, 2, matched)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("no match", func(t *testing.T) {
|
||||||
|
matched, unmatched := mergeTensors(unmatched, merge{"x.*.y", "x.y"})
|
||||||
|
if len(unmatched) != 5 {
|
||||||
|
t.Error("expected 5 remaining tensors, got", len(unmatched))
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(matched) != 0 {
|
||||||
|
t.Error("expected no merged tensors, got", len(matched))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
package discover
|
package discover
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
@@ -55,10 +56,13 @@ func cudaVariant(gpuInfo CudaGPUInfo) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return "sbsa"
|
||||||
}
|
}
|
||||||
|
|
||||||
// driver 12.0 has problems with the cuda v12 library, so run v11 on those older drivers
|
// driver 12.0 has problems with the cuda v12 library, so run v11 on those older drivers
|
||||||
if gpuInfo.DriverMajor < 12 || (gpuInfo.DriverMajor == 12 && gpuInfo.DriverMinor == 0) {
|
if gpuInfo.DriverMajor < 12 || (gpuInfo.DriverMajor == 12 && gpuInfo.DriverMinor == 0) {
|
||||||
|
// The detected driver is older than Feb 2023
|
||||||
|
slog.Warn("old CUDA driver detected - please upgrade to a newer driver", "version", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor))
|
||||||
return "v11"
|
return "v11"
|
||||||
}
|
}
|
||||||
return "v12"
|
return "v12"
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ import (
|
|||||||
// '../lib/ollama' on Linux and the executable's directory on macOS
|
// '../lib/ollama' on Linux and the executable's directory on macOS
|
||||||
// note: distribution builds, additional GPU-specific libraries are
|
// note: distribution builds, additional GPU-specific libraries are
|
||||||
// found in subdirectories of the returned path, such as
|
// found in subdirectories of the returned path, such as
|
||||||
// 'cuda_v11', 'cuda_v12', 'rocm', etc.
|
// 'cuda_v12', 'rocm', etc.
|
||||||
var LibOllamaPath string = func() string {
|
var LibOllamaPath string = func() string {
|
||||||
exe, err := os.Executable()
|
exe, err := os.Executable()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# GPU
|
# GPU
|
||||||
## Nvidia
|
## Nvidia
|
||||||
Ollama supports Nvidia GPUs with compute capability 5.0+.
|
Ollama supports Nvidia GPUs with compute capability 5.0+ and driver version 531 and newer.
|
||||||
|
|
||||||
Check your compute compatibility to see if your card is supported:
|
Check your compute compatibility to see if your card is supported:
|
||||||
[https://developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus)
|
[https://developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ Ollama includes multiple LLM libraries compiled for different GPUs and CPU vecto
|
|||||||
In the server log, you will see a message that looks something like this (varies from release to release):
|
In the server log, you will see a message that looks something like this (varies from release to release):
|
||||||
|
|
||||||
```
|
```
|
||||||
Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v11 rocm_v5]
|
Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v12 rocm_v5]
|
||||||
```
|
```
|
||||||
|
|
||||||
**Experimental LLM Library Override**
|
**Experimental LLM Library Override**
|
||||||
|
|||||||
347
fs/gguf/gguf.go
Normal file
347
fs/gguf/gguf.go
Normal file
@@ -0,0 +1,347 @@
|
|||||||
|
package gguf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"cmp"
|
||||||
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"iter"
|
||||||
|
"os"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
typeUint8 uint32 = iota
|
||||||
|
typeInt8
|
||||||
|
typeUint16
|
||||||
|
typeInt16
|
||||||
|
typeUint32
|
||||||
|
typeInt32
|
||||||
|
typeFloat32
|
||||||
|
typeBool
|
||||||
|
typeString
|
||||||
|
typeArray
|
||||||
|
typeUint64
|
||||||
|
typeInt64
|
||||||
|
typeFloat64
|
||||||
|
)
|
||||||
|
|
||||||
|
var ErrUnsupported = errors.New("unsupported")
|
||||||
|
|
||||||
|
type File struct {
|
||||||
|
Magic [4]byte
|
||||||
|
Version uint32
|
||||||
|
|
||||||
|
keyValues *lazy[KeyValue]
|
||||||
|
tensors *lazy[TensorInfo]
|
||||||
|
offset int64
|
||||||
|
|
||||||
|
file *os.File
|
||||||
|
reader *bufferedReader
|
||||||
|
bts []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func Open(path string) (f *File, err error) {
|
||||||
|
f = &File{bts: make([]byte, 4096)}
|
||||||
|
f.file, err = os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
f.reader = newBufferedReader(f.file, 32<<10)
|
||||||
|
|
||||||
|
if err := binary.Read(f.reader, binary.LittleEndian, &f.Magic); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if bytes.Equal(f.Magic[:], []byte("gguf")) {
|
||||||
|
return nil, fmt.Errorf("%w file type %v", ErrUnsupported, f.Magic)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := binary.Read(f.reader, binary.LittleEndian, &f.Version); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if f.Version < 2 {
|
||||||
|
return nil, fmt.Errorf("%w version %v", ErrUnsupported, f.Version)
|
||||||
|
}
|
||||||
|
|
||||||
|
f.tensors, err = newLazy(f, f.readTensor)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
f.tensors.successFunc = func() error {
|
||||||
|
offset := f.reader.offset
|
||||||
|
|
||||||
|
alignment := cmp.Or(f.KeyValue("general.alignment").Int(), 32)
|
||||||
|
f.offset = offset + (alignment-offset%alignment)%alignment
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
f.keyValues, err = newLazy(f, f.readKeyValue)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return f, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *File) readTensor() (TensorInfo, error) {
|
||||||
|
name, err := readString(f)
|
||||||
|
if err != nil {
|
||||||
|
return TensorInfo{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
dims, err := read[uint32](f)
|
||||||
|
if err != nil {
|
||||||
|
return TensorInfo{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
shape := make([]uint64, dims)
|
||||||
|
for i := range dims {
|
||||||
|
shape[i], err = read[uint64](f)
|
||||||
|
if err != nil {
|
||||||
|
return TensorInfo{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type_, err := read[uint32](f)
|
||||||
|
if err != nil {
|
||||||
|
return TensorInfo{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
offset, err := read[uint64](f)
|
||||||
|
if err != nil {
|
||||||
|
return TensorInfo{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return TensorInfo{
|
||||||
|
Name: name,
|
||||||
|
Offset: offset,
|
||||||
|
Shape: shape,
|
||||||
|
Type: TensorType(type_),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *File) readKeyValue() (KeyValue, error) {
|
||||||
|
key, err := readString(f)
|
||||||
|
if err != nil {
|
||||||
|
return KeyValue{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
t, err := read[uint32](f)
|
||||||
|
if err != nil {
|
||||||
|
return KeyValue{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
value, err := func() (any, error) {
|
||||||
|
switch t {
|
||||||
|
case typeUint8:
|
||||||
|
return read[uint8](f)
|
||||||
|
case typeInt8:
|
||||||
|
return read[int8](f)
|
||||||
|
case typeUint16:
|
||||||
|
return read[uint16](f)
|
||||||
|
case typeInt16:
|
||||||
|
return read[int16](f)
|
||||||
|
case typeUint32:
|
||||||
|
return read[uint32](f)
|
||||||
|
case typeInt32:
|
||||||
|
return read[int32](f)
|
||||||
|
case typeUint64:
|
||||||
|
return read[uint64](f)
|
||||||
|
case typeInt64:
|
||||||
|
return read[int64](f)
|
||||||
|
case typeFloat32:
|
||||||
|
return read[float32](f)
|
||||||
|
case typeFloat64:
|
||||||
|
return read[float64](f)
|
||||||
|
case typeBool:
|
||||||
|
return read[bool](f)
|
||||||
|
case typeString:
|
||||||
|
return readString(f)
|
||||||
|
case typeArray:
|
||||||
|
return readArray(f)
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("%w type %d", ErrUnsupported, t)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
if err != nil {
|
||||||
|
return KeyValue{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return KeyValue{
|
||||||
|
Key: key,
|
||||||
|
Value: Value{value},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func read[T any](f *File) (t T, err error) {
|
||||||
|
err = binary.Read(f.reader, binary.LittleEndian, &t)
|
||||||
|
return t, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func readString(f *File) (string, error) {
|
||||||
|
n, err := read[uint64](f)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
if int(n) > len(f.bts) {
|
||||||
|
f.bts = make([]byte, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
bts := f.bts[:n]
|
||||||
|
if _, err := io.ReadFull(f.reader, bts); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer clear(bts)
|
||||||
|
|
||||||
|
return string(bts), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readArray(f *File) (any, error) {
|
||||||
|
t, err := read[uint32](f)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
n, err := read[uint64](f)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch t {
|
||||||
|
case typeUint8:
|
||||||
|
return readArrayData[uint8](f, n)
|
||||||
|
case typeInt8:
|
||||||
|
return readArrayData[int8](f, n)
|
||||||
|
case typeUint16:
|
||||||
|
return readArrayData[uint16](f, n)
|
||||||
|
case typeInt16:
|
||||||
|
return readArrayData[int16](f, n)
|
||||||
|
case typeUint32:
|
||||||
|
return readArrayData[uint32](f, n)
|
||||||
|
case typeInt32:
|
||||||
|
return readArrayData[int32](f, n)
|
||||||
|
case typeUint64:
|
||||||
|
return readArrayData[uint64](f, n)
|
||||||
|
case typeInt64:
|
||||||
|
return readArrayData[int64](f, n)
|
||||||
|
case typeFloat32:
|
||||||
|
return readArrayData[float32](f, n)
|
||||||
|
case typeFloat64:
|
||||||
|
return readArrayData[float64](f, n)
|
||||||
|
case typeBool:
|
||||||
|
return readArrayData[bool](f, n)
|
||||||
|
case typeString:
|
||||||
|
return readArrayString(f, n)
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("%w type %d", ErrUnsupported, t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func readArrayData[T any](f *File, n uint64) (s []T, err error) {
|
||||||
|
s = make([]T, n)
|
||||||
|
for i := range n {
|
||||||
|
e, err := read[T](f)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
s[i] = e
|
||||||
|
}
|
||||||
|
|
||||||
|
return s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readArrayString(f *File, n uint64) (s []string, err error) {
|
||||||
|
s = make([]string, n)
|
||||||
|
for i := range n {
|
||||||
|
e, err := readString(f)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
s[i] = e
|
||||||
|
}
|
||||||
|
|
||||||
|
return s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *File) Close() error {
|
||||||
|
f.keyValues.stop()
|
||||||
|
f.tensors.stop()
|
||||||
|
return f.file.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *File) KeyValue(key string) KeyValue {
|
||||||
|
if !strings.HasPrefix(key, "general.") && !strings.HasPrefix(key, "tokenizer.") {
|
||||||
|
key = f.KeyValue("general.architecture").String() + "." + key
|
||||||
|
}
|
||||||
|
|
||||||
|
if index := slices.IndexFunc(f.keyValues.values, func(kv KeyValue) bool {
|
||||||
|
return kv.Key == key
|
||||||
|
}); index >= 0 {
|
||||||
|
return f.keyValues.values[index]
|
||||||
|
}
|
||||||
|
|
||||||
|
for keyValue, ok := f.keyValues.next(); ok; keyValue, ok = f.keyValues.next() {
|
||||||
|
if keyValue.Key == key {
|
||||||
|
return keyValue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return KeyValue{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *File) NumKeyValues() int {
|
||||||
|
return int(f.keyValues.count)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *File) KeyValues() iter.Seq2[int, KeyValue] {
|
||||||
|
return f.keyValues.All()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *File) TensorInfo(name string) TensorInfo {
|
||||||
|
if index := slices.IndexFunc(f.tensors.values, func(t TensorInfo) bool {
|
||||||
|
return t.Name == name
|
||||||
|
}); index >= 0 {
|
||||||
|
return f.tensors.values[index]
|
||||||
|
}
|
||||||
|
|
||||||
|
// fast-forward through key values if we haven't already
|
||||||
|
_ = f.keyValues.rest()
|
||||||
|
for tensor, ok := f.tensors.next(); ok; tensor, ok = f.tensors.next() {
|
||||||
|
if tensor.Name == name {
|
||||||
|
return tensor
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return TensorInfo{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *File) NumTensors() int {
|
||||||
|
return int(f.tensors.count)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *File) TensorInfos() iter.Seq2[int, TensorInfo] {
|
||||||
|
// fast forward through key values if we haven't already
|
||||||
|
f.keyValues.rest()
|
||||||
|
return f.tensors.All()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *File) TensorReader(name string) (TensorInfo, io.Reader, error) {
|
||||||
|
t := f.TensorInfo(name)
|
||||||
|
if t.NumBytes() == 0 {
|
||||||
|
return TensorInfo{}, nil, fmt.Errorf("tensor %s not found", name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// fast forward through tensor info if we haven't already
|
||||||
|
_ = f.tensors.rest()
|
||||||
|
return t, io.NewSectionReader(f.file, f.offset+int64(t.Offset), t.NumBytes()), nil
|
||||||
|
}
|
||||||
249
fs/gguf/gguf_test.go
Normal file
249
fs/gguf/gguf_test.go
Normal file
@@ -0,0 +1,249 @@
|
|||||||
|
package gguf_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
"github.com/google/go-cmp/cmp/cmpopts"
|
||||||
|
"github.com/ollama/ollama/fs/ggml"
|
||||||
|
"github.com/ollama/ollama/fs/gguf"
|
||||||
|
)
|
||||||
|
|
||||||
|
func createBinFile(tb testing.TB) string {
|
||||||
|
tb.Helper()
|
||||||
|
f, err := os.CreateTemp(tb.TempDir(), "")
|
||||||
|
if err != nil {
|
||||||
|
tb.Fatal(err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
kv := ggml.KV{
|
||||||
|
"general.architecture": "llama",
|
||||||
|
"llama.block_count": uint32(8),
|
||||||
|
"llama.embedding_length": uint32(3),
|
||||||
|
"llama.attention.head_count": uint32(2),
|
||||||
|
"llama.attention.head_count_kv": uint32(2),
|
||||||
|
"llama.attention.key_length": uint32(3),
|
||||||
|
"llama.rope.dimension_count": uint32(4),
|
||||||
|
"llama.rope.freq_base": float32(10000.0),
|
||||||
|
"llama.rope.freq_scale": float32(1.0),
|
||||||
|
"llama.attention.layer_norm_rms_epsilon": float32(1e-6),
|
||||||
|
"tokenizer.ggml.eos_token_id": uint32(0),
|
||||||
|
"tokenizer.ggml.eos_token_ids": []int32{1, 2, 3},
|
||||||
|
"tokenizer.ggml.tokens": []string{"hello", "world"},
|
||||||
|
"tokenizer.ggml.scores": []float32{0, 1},
|
||||||
|
}
|
||||||
|
|
||||||
|
tensors := []*ggml.Tensor{
|
||||||
|
{
|
||||||
|
Name: "token_embd.weight",
|
||||||
|
Kind: 0,
|
||||||
|
Shape: []uint64{2, 3},
|
||||||
|
WriterTo: bytes.NewBuffer(make([]byte, 4*2*3)),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "output.weight",
|
||||||
|
Kind: 0,
|
||||||
|
Shape: []uint64{3, 2},
|
||||||
|
WriterTo: bytes.NewBuffer(make([]byte, 4*3*2)),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range 8 {
|
||||||
|
tensors = append(tensors, &ggml.Tensor{
|
||||||
|
Name: "blk." + strconv.Itoa(i) + ".attn_q.weight",
|
||||||
|
Kind: 0,
|
||||||
|
Shape: []uint64{3, 3},
|
||||||
|
WriterTo: bytes.NewBuffer(make([]byte, 4*3*3)),
|
||||||
|
}, &ggml.Tensor{
|
||||||
|
Name: "blk." + strconv.Itoa(i) + ".attn_k.weight",
|
||||||
|
Kind: 0,
|
||||||
|
Shape: []uint64{3, 3},
|
||||||
|
WriterTo: bytes.NewBuffer(make([]byte, 4*3*3)),
|
||||||
|
}, &ggml.Tensor{
|
||||||
|
Name: "blk." + strconv.Itoa(i) + ".attn_v.weight",
|
||||||
|
Kind: 0,
|
||||||
|
Shape: []uint64{3, 3},
|
||||||
|
WriterTo: bytes.NewBuffer(make([]byte, 4*3*3)),
|
||||||
|
}, &ggml.Tensor{
|
||||||
|
Name: "blk." + strconv.Itoa(i) + ".attn_output.weight",
|
||||||
|
Kind: 0,
|
||||||
|
Shape: []uint64{3, 3},
|
||||||
|
WriterTo: bytes.NewBuffer(make([]byte, 4*3*3)),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := ggml.WriteGGUF(f, kv, tensors); err != nil {
|
||||||
|
tb.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f.Name()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRead(t *testing.T) {
|
||||||
|
f, err := gguf.Open(createBinFile(t))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
if got := f.KeyValue("does.not.exist").Valid(); got {
|
||||||
|
t.Errorf(`KeyValue("does.not.exist").Exists() = %v, want false`, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
if got := f.KeyValue("general.architecture").String(); got != "llama" {
|
||||||
|
t.Errorf(`KeyValue("general.architecture").String() = %q, want %q`, got, "llama")
|
||||||
|
}
|
||||||
|
|
||||||
|
if got := f.TensorInfo("token_embd.weight"); got.Name != "token_embd.weight" {
|
||||||
|
t.Errorf(`TensorInfo("token_embd.weight").Name = %q, want %q`, got.Name, "token_embd.weight")
|
||||||
|
} else if diff := cmp.Diff(got.Shape, []uint64{2, 3}); diff != "" {
|
||||||
|
t.Errorf(`TensorInfo("token_embd.weight").Shape mismatch (-got +want):\n%s`, diff)
|
||||||
|
} else if got.Type != gguf.TensorTypeF32 {
|
||||||
|
t.Errorf(`TensorInfo("token_embd.weight").Type = %d, want %d`, got.Type, gguf.TensorTypeF32)
|
||||||
|
}
|
||||||
|
|
||||||
|
if got := f.KeyValue("block_count").Uint(); got != 8 {
|
||||||
|
t.Errorf(`KeyValue("block_count").Uint() = %d, want %d`, got, 8)
|
||||||
|
}
|
||||||
|
|
||||||
|
if diff := cmp.Diff(f.KeyValue("tokenizer.ggml.tokens").Strings(), []string{"hello", "world"}); diff != "" {
|
||||||
|
t.Errorf("KeyValue(\"tokenizer.ggml.tokens\").Strings() mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
|
||||||
|
if diff := cmp.Diff(f.KeyValue("tokenizer.ggml.scores").Floats(), []float64{0, 1}); diff != "" {
|
||||||
|
t.Errorf("KeyValue(\"tokenizer.ggml.scores\").Ints() mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
|
||||||
|
var kvs []string
|
||||||
|
for _, kv := range f.KeyValues() {
|
||||||
|
if !kv.Valid() {
|
||||||
|
t.Error("found invalid key-value pair:", kv)
|
||||||
|
}
|
||||||
|
|
||||||
|
kvs = append(kvs, kv.Key)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(kvs) != f.NumKeyValues() {
|
||||||
|
t.Errorf("iterated key count = %d, want %d", len(kvs), f.NumKeyValues())
|
||||||
|
}
|
||||||
|
|
||||||
|
if diff := cmp.Diff(kvs, []string{
|
||||||
|
"general.architecture",
|
||||||
|
"llama.block_count",
|
||||||
|
"llama.embedding_length",
|
||||||
|
"llama.attention.head_count",
|
||||||
|
"llama.attention.head_count_kv",
|
||||||
|
"llama.attention.key_length",
|
||||||
|
"llama.rope.dimension_count",
|
||||||
|
"llama.rope.freq_base",
|
||||||
|
"llama.rope.freq_scale",
|
||||||
|
"llama.attention.layer_norm_rms_epsilon",
|
||||||
|
"tokenizer.ggml.eos_token_id",
|
||||||
|
"tokenizer.ggml.eos_token_ids",
|
||||||
|
"tokenizer.ggml.tokens",
|
||||||
|
"tokenizer.ggml.scores",
|
||||||
|
}, cmpopts.SortSlices(strings.Compare)); diff != "" {
|
||||||
|
t.Errorf("KeyValues() mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
|
||||||
|
var tis []string
|
||||||
|
for _, ti := range f.TensorInfos() {
|
||||||
|
if !ti.Valid() {
|
||||||
|
t.Error("found invalid tensor info:", ti)
|
||||||
|
}
|
||||||
|
|
||||||
|
tis = append(tis, ti.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(tis) != f.NumTensors() {
|
||||||
|
t.Errorf("iterated tensor count = %d, want %d", len(tis), f.NumTensors())
|
||||||
|
}
|
||||||
|
|
||||||
|
if diff := cmp.Diff(tis, []string{
|
||||||
|
"token_embd.weight",
|
||||||
|
"output.weight",
|
||||||
|
"blk.0.attn_q.weight",
|
||||||
|
"blk.0.attn_k.weight",
|
||||||
|
"blk.0.attn_v.weight",
|
||||||
|
"blk.0.attn_output.weight",
|
||||||
|
"blk.1.attn_q.weight",
|
||||||
|
"blk.1.attn_k.weight",
|
||||||
|
"blk.1.attn_v.weight",
|
||||||
|
"blk.1.attn_output.weight",
|
||||||
|
"blk.2.attn_q.weight",
|
||||||
|
"blk.2.attn_k.weight",
|
||||||
|
"blk.2.attn_v.weight",
|
||||||
|
"blk.2.attn_output.weight",
|
||||||
|
"blk.3.attn_q.weight",
|
||||||
|
"blk.3.attn_k.weight",
|
||||||
|
"blk.3.attn_v.weight",
|
||||||
|
"blk.3.attn_output.weight",
|
||||||
|
"blk.4.attn_q.weight",
|
||||||
|
"blk.4.attn_k.weight",
|
||||||
|
"blk.4.attn_v.weight",
|
||||||
|
"blk.4.attn_output.weight",
|
||||||
|
"blk.5.attn_q.weight",
|
||||||
|
"blk.5.attn_k.weight",
|
||||||
|
"blk.5.attn_v.weight",
|
||||||
|
"blk.5.attn_output.weight",
|
||||||
|
"blk.6.attn_q.weight",
|
||||||
|
"blk.6.attn_k.weight",
|
||||||
|
"blk.6.attn_v.weight",
|
||||||
|
"blk.6.attn_output.weight",
|
||||||
|
"blk.7.attn_q.weight",
|
||||||
|
"blk.7.attn_k.weight",
|
||||||
|
"blk.7.attn_v.weight",
|
||||||
|
"blk.7.attn_output.weight",
|
||||||
|
}, cmpopts.SortSlices(strings.Compare)); diff != "" {
|
||||||
|
t.Errorf("TensorInfos() mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
|
||||||
|
ti, r, err := f.TensorReader("output.weight")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf(`TensorReader("output.weight") error: %v`, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if ti.Name != "output.weight" {
|
||||||
|
t.Errorf(`TensorReader("output.weight").Name = %q, want %q`, ti.Name, "output.weight")
|
||||||
|
} else if diff := cmp.Diff(ti.Shape, []uint64{3, 2}); diff != "" {
|
||||||
|
t.Errorf(`TensorReader("output.weight").Shape mismatch (-got +want):\n%s`, diff)
|
||||||
|
} else if ti.Type != gguf.TensorTypeF32 {
|
||||||
|
t.Errorf(`TensorReader("output.weight").Type = %d, want %d`, ti.Type, gguf.TensorTypeF32)
|
||||||
|
}
|
||||||
|
|
||||||
|
var b bytes.Buffer
|
||||||
|
if _, err := b.ReadFrom(r); err != nil {
|
||||||
|
t.Fatalf(`ReadFrom TensorReader("output.weight") error: %v`, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.Len() != int(ti.NumBytes()) {
|
||||||
|
t.Errorf(`ReadFrom TensorReader("output.weight") length = %d, want %d`, b.Len(), ti.NumBytes())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkRead(b *testing.B) {
|
||||||
|
b.ReportAllocs()
|
||||||
|
|
||||||
|
p := createBinFile(b)
|
||||||
|
for b.Loop() {
|
||||||
|
f, err := gguf.Open(p)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if got := f.KeyValue("general.architecture").String(); got != "llama" {
|
||||||
|
b.Errorf("got = %q, want %q", got, "llama")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate through some tensors
|
||||||
|
for range f.TensorInfos() {
|
||||||
|
}
|
||||||
|
|
||||||
|
f.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
90
fs/gguf/keyvalue.go
Normal file
90
fs/gguf/keyvalue.go
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
package gguf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"slices"
|
||||||
|
)
|
||||||
|
|
||||||
|
type KeyValue struct {
|
||||||
|
Key string
|
||||||
|
Value
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kv KeyValue) Valid() bool {
|
||||||
|
return kv.Key != "" && kv.Value.value != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type Value struct {
|
||||||
|
value any
|
||||||
|
}
|
||||||
|
|
||||||
|
func value[T any](v Value, kinds ...reflect.Kind) (t T) {
|
||||||
|
vv := reflect.ValueOf(v.value)
|
||||||
|
if slices.Contains(kinds, vv.Kind()) {
|
||||||
|
t = vv.Convert(reflect.TypeOf(t)).Interface().(T)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func values[T any](v Value, kinds ...reflect.Kind) (ts []T) {
|
||||||
|
switch vv := reflect.ValueOf(v.value); vv.Kind() {
|
||||||
|
case reflect.Slice:
|
||||||
|
if slices.Contains(kinds, vv.Type().Elem().Kind()) {
|
||||||
|
ts = make([]T, vv.Len())
|
||||||
|
for i := range vv.Len() {
|
||||||
|
ts[i] = vv.Index(i).Convert(reflect.TypeOf(ts[i])).Interface().(T)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Int returns Value as a signed integer. If it is not a signed integer, it returns 0.
|
||||||
|
func (v Value) Int() int64 {
|
||||||
|
return value[int64](v, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ints returns Value as a signed integer slice. If it is not a signed integer slice, it returns nil.
|
||||||
|
func (v Value) Ints() (i64s []int64) {
|
||||||
|
return values[int64](v, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Uint converts an unsigned integer value to uint64. If the value is not a unsigned integer, it returns 0.
|
||||||
|
func (v Value) Uint() uint64 {
|
||||||
|
return value[uint64](v, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Uints returns Value as a unsigned integer slice. If it is not a unsigned integer slice, it returns nil.
|
||||||
|
func (v Value) Uints() (u64s []uint64) {
|
||||||
|
return values[uint64](v, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Float returns Value as a float. If it is not a float, it returns 0.
|
||||||
|
func (v Value) Float() float64 {
|
||||||
|
return value[float64](v, reflect.Float32, reflect.Float64)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Floats returns Value as a float slice. If it is not a float slice, it returns nil.
|
||||||
|
func (v Value) Floats() (f64s []float64) {
|
||||||
|
return values[float64](v, reflect.Float32, reflect.Float64)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bool returns Value as a boolean. If it is not a boolean, it returns false.
|
||||||
|
func (v Value) Bool() bool {
|
||||||
|
return value[bool](v, reflect.Bool)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bools returns Value as a boolean slice. If it is not a boolean slice, it returns nil.
|
||||||
|
func (v Value) Bools() (bools []bool) {
|
||||||
|
return values[bool](v, reflect.Bool)
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns Value as a string. If it is not a string, it returns an empty string.
|
||||||
|
func (v Value) String() string {
|
||||||
|
return value[string](v, reflect.String)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strings returns Value as a string slice. If it is not a string slice, it returns nil.
|
||||||
|
func (v Value) Strings() (strings []string) {
|
||||||
|
return values[string](v, reflect.String)
|
||||||
|
}
|
||||||
208
fs/gguf/keyvalue_test.go
Normal file
208
fs/gguf/keyvalue_test.go
Normal file
@@ -0,0 +1,208 @@
|
|||||||
|
package gguf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
)
|
||||||
|
|
||||||
|
func split(name string, values map[string][]any) (matched []any, unmatched []any) {
|
||||||
|
for key, value := range values {
|
||||||
|
if key == name {
|
||||||
|
matched = value
|
||||||
|
} else {
|
||||||
|
unmatched = append(unmatched, value...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValue(t *testing.T) {
|
||||||
|
values := map[string][]any{
|
||||||
|
"int64": {int(42), int8(42), int16(42), int32(42), int64(42)},
|
||||||
|
"uint64": {uint(42), uint8(42), uint16(42), uint32(42), uint64(42)},
|
||||||
|
"float64": {float32(42), float64(42)},
|
||||||
|
"string": {"42", "hello"},
|
||||||
|
"bool": {true, false},
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("int64", func(t *testing.T) {
|
||||||
|
matched, unmatched := split("int64", values)
|
||||||
|
for _, v := range matched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if i64 := kv.Int(); i64 != 42 {
|
||||||
|
t.Errorf("expected 42, got %d", i64)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, v := range unmatched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if i64 := kv.Int(); i64 != 0 {
|
||||||
|
t.Errorf("expected 42, got %d", i64)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("uint64", func(t *testing.T) {
|
||||||
|
matched, unmatched := split("uint64", values)
|
||||||
|
for _, v := range matched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if u64 := kv.Uint(); u64 != 42 {
|
||||||
|
t.Errorf("expected 42, got %d", u64)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, v := range unmatched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if u64 := kv.Uint(); u64 != 0 {
|
||||||
|
t.Errorf("expected 42, got %d", u64)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("float64", func(t *testing.T) {
|
||||||
|
matched, unmatched := split("float64", values)
|
||||||
|
for _, v := range matched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if f64 := kv.Float(); f64 != 42 {
|
||||||
|
t.Errorf("expected 42, got %f", f64)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, v := range unmatched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if f64 := kv.Float(); f64 != 0 {
|
||||||
|
t.Errorf("expected 42, got %f", f64)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("string", func(t *testing.T) {
|
||||||
|
matched, unmatched := split("string", values)
|
||||||
|
for _, v := range matched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if s := kv.String(); s != v {
|
||||||
|
t.Errorf("expected 42, got %s", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, v := range unmatched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if s := kv.String(); s != "" {
|
||||||
|
t.Errorf("expected 42, got %s", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("bool", func(t *testing.T) {
|
||||||
|
matched, unmatched := split("bool", values)
|
||||||
|
for _, v := range matched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if b := kv.Bool(); b != v {
|
||||||
|
t.Errorf("expected true, got %v", b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, v := range unmatched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if b := kv.Bool(); b != false {
|
||||||
|
t.Errorf("expected false, got %v", b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValues(t *testing.T) {
|
||||||
|
values := map[string][]any{
|
||||||
|
"int64s": {[]int{42}, []int8{42}, []int16{42}, []int32{42}, []int64{42}},
|
||||||
|
"uint64s": {[]uint{42}, []uint8{42}, []uint16{42}, []uint32{42}, []uint64{42}},
|
||||||
|
"float64s": {[]float32{42}, []float64{42}},
|
||||||
|
"strings": {[]string{"42"}, []string{"hello"}},
|
||||||
|
"bools": {[]bool{true}, []bool{false}},
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("int64s", func(t *testing.T) {
|
||||||
|
matched, unmatched := split("int64s", values)
|
||||||
|
for _, v := range matched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if diff := cmp.Diff(kv.Ints(), []int64{42}); diff != "" {
|
||||||
|
t.Errorf("diff: %s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, v := range unmatched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if i64s := kv.Ints(); i64s != nil {
|
||||||
|
t.Errorf("expected nil, got %v", i64s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("uint64s", func(t *testing.T) {
|
||||||
|
matched, unmatched := split("uint64s", values)
|
||||||
|
for _, v := range matched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if diff := cmp.Diff(kv.Uints(), []uint64{42}); diff != "" {
|
||||||
|
t.Errorf("diff: %s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, v := range unmatched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if u64s := kv.Uints(); u64s != nil {
|
||||||
|
t.Errorf("expected nil, got %v", u64s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("float64s", func(t *testing.T) {
|
||||||
|
matched, unmatched := split("float64s", values)
|
||||||
|
for _, v := range matched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if diff := cmp.Diff(kv.Floats(), []float64{42}); diff != "" {
|
||||||
|
t.Errorf("diff: %s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, v := range unmatched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if f64s := kv.Floats(); f64s != nil {
|
||||||
|
t.Errorf("expected nil, got %v", f64s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("strings", func(t *testing.T) {
|
||||||
|
matched, unmatched := split("strings", values)
|
||||||
|
for _, v := range matched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if diff := cmp.Diff(kv.Strings(), v); diff != "" {
|
||||||
|
t.Errorf("diff: %s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, v := range unmatched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if s := kv.Strings(); s != nil {
|
||||||
|
t.Errorf("expected nil, got %v", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("bools", func(t *testing.T) {
|
||||||
|
matched, unmatched := split("bools", values)
|
||||||
|
for _, v := range matched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if diff := cmp.Diff(kv.Bools(), v); diff != "" {
|
||||||
|
t.Errorf("diff: %s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, v := range unmatched {
|
||||||
|
kv := KeyValue{"key", Value{v}}
|
||||||
|
if b := kv.Bools(); b != nil {
|
||||||
|
t.Errorf("expected nil, got %v", b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
89
fs/gguf/lazy.go
Normal file
89
fs/gguf/lazy.go
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
package gguf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"iter"
|
||||||
|
"log/slog"
|
||||||
|
)
|
||||||
|
|
||||||
|
type lazy[T any] struct {
|
||||||
|
count uint64
|
||||||
|
next func() (T, bool)
|
||||||
|
stop func()
|
||||||
|
values []T
|
||||||
|
|
||||||
|
// successFunc is called when all values have been successfully read.
|
||||||
|
successFunc func() error
|
||||||
|
}
|
||||||
|
|
||||||
|
func newLazy[T any](f *File, fn func() (T, error)) (*lazy[T], error) {
|
||||||
|
it := lazy[T]{}
|
||||||
|
if err := binary.Read(f.reader, binary.LittleEndian, &it.count); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
it.values = make([]T, 0)
|
||||||
|
it.next, it.stop = iter.Pull(func(yield func(T) bool) {
|
||||||
|
for i := range it.count {
|
||||||
|
t, err := fn()
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("error reading tensor", "index", i, "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
it.values = append(it.values, t)
|
||||||
|
if !yield(t) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if it.successFunc != nil {
|
||||||
|
it.successFunc()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
return &it, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *lazy[T]) Values() iter.Seq[T] {
|
||||||
|
return func(yield func(T) bool) {
|
||||||
|
for _, v := range g.All() {
|
||||||
|
if !yield(v) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *lazy[T]) All() iter.Seq2[int, T] {
|
||||||
|
return func(yield func(int, T) bool) {
|
||||||
|
for i := range int(g.count) {
|
||||||
|
if i < len(g.values) {
|
||||||
|
if !yield(i, g.values[i]) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
t, ok := g.next()
|
||||||
|
if !ok {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if !yield(i, t) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *lazy[T]) rest() (collected bool) {
|
||||||
|
for {
|
||||||
|
_, ok := g.next()
|
||||||
|
collected = collected || ok
|
||||||
|
if !ok {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return collected
|
||||||
|
}
|
||||||
23
fs/gguf/reader.go
Normal file
23
fs/gguf/reader.go
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package gguf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"io"
|
||||||
|
)
|
||||||
|
|
||||||
|
type bufferedReader struct {
|
||||||
|
offset int64
|
||||||
|
*bufio.Reader
|
||||||
|
}
|
||||||
|
|
||||||
|
func newBufferedReader(rs io.ReadSeeker, size int) *bufferedReader {
|
||||||
|
return &bufferedReader{
|
||||||
|
Reader: bufio.NewReaderSize(rs, size),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rs *bufferedReader) Read(p []byte) (n int, err error) {
|
||||||
|
n, err = rs.Reader.Read(p)
|
||||||
|
rs.offset += int64(n)
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
288
fs/gguf/tensor.go
Normal file
288
fs/gguf/tensor.go
Normal file
@@ -0,0 +1,288 @@
|
|||||||
|
package gguf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log/slog"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type TensorInfo struct {
|
||||||
|
Name string
|
||||||
|
Offset uint64
|
||||||
|
Shape []uint64
|
||||||
|
Type TensorType
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ti TensorInfo) Valid() bool {
|
||||||
|
return ti.Name != "" && ti.NumBytes() > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ti TensorInfo) NumValues() int64 {
|
||||||
|
var numItems int64 = 1
|
||||||
|
for _, dim := range ti.Shape {
|
||||||
|
numItems *= int64(dim)
|
||||||
|
}
|
||||||
|
return numItems
|
||||||
|
}
|
||||||
|
|
||||||
|
// NumBytes returns the number of bytes in the tensor.
|
||||||
|
func (ti TensorInfo) NumBytes() int64 {
|
||||||
|
return int64(float64(ti.NumValues()) * ti.Type.NumBytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ti TensorInfo) LogValue() slog.Value {
|
||||||
|
return slog.GroupValue(
|
||||||
|
slog.String("name", ti.Name),
|
||||||
|
slog.Int64("offset", int64(ti.Offset)),
|
||||||
|
slog.Any("shape", ti.Shape),
|
||||||
|
slog.Int64("num_values", ti.NumValues()),
|
||||||
|
slog.Int64("num_bytes", ti.NumBytes()),
|
||||||
|
slog.Any("type", ti.Type),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
type TensorType uint32
|
||||||
|
|
||||||
|
const (
|
||||||
|
TensorTypeF32 TensorType = iota
|
||||||
|
TensorTypeF16
|
||||||
|
TensorTypeQ4_0
|
||||||
|
TensorTypeQ4_1
|
||||||
|
|
||||||
|
// unexported // unused in gguf
|
||||||
|
tensorTypeQ4_2
|
||||||
|
tensorTypeQ4_3
|
||||||
|
|
||||||
|
TensorTypeQ5_0
|
||||||
|
TensorTypeQ5_1
|
||||||
|
TensorTypeQ8_0
|
||||||
|
TensorTypeQ8_1
|
||||||
|
TensorTypeQ2_K
|
||||||
|
TensorTypeQ3_K
|
||||||
|
TensorTypeQ4_K
|
||||||
|
TensorTypeQ5_K
|
||||||
|
TensorTypeQ6_K
|
||||||
|
TensorTypeQ8_K
|
||||||
|
|
||||||
|
// unexported // unquantizable by ollama
|
||||||
|
tensorTypeIQ2_XXS
|
||||||
|
tensorTypeIQ2_XS
|
||||||
|
tensorTypeIQ3_XXS
|
||||||
|
tensorTypeIQ1_S
|
||||||
|
tensorTypeIQ4_NL
|
||||||
|
tensorTypeIQ3_S
|
||||||
|
tensorTypeIQ2_S
|
||||||
|
tensorTypeIQ4_XS
|
||||||
|
|
||||||
|
TensorTypeI8
|
||||||
|
TensorTypeI16
|
||||||
|
TensorTypeI32
|
||||||
|
TensorTypeI64
|
||||||
|
TensorTypeF64
|
||||||
|
|
||||||
|
// unexported // unquantizable by ollama
|
||||||
|
tensorTypeIQ1_M
|
||||||
|
|
||||||
|
TensorTypeBF16
|
||||||
|
|
||||||
|
// unexported // unused in gguf
|
||||||
|
tensorTypeQ4_0_4_4
|
||||||
|
tensorTypeQ4_0_4_8
|
||||||
|
tensorTypeQ4_0_8_8
|
||||||
|
|
||||||
|
// unexported // unquantizable by ollama
|
||||||
|
tensorTypeTQ1_0
|
||||||
|
tensorTypeTQ2_0
|
||||||
|
|
||||||
|
// unexported // unused in gguf
|
||||||
|
tensorTypeIQ4_NL_4_4
|
||||||
|
tensorTypeIQ4_NL_4_8
|
||||||
|
tensorTypeIQ4_NL_8_8
|
||||||
|
)
|
||||||
|
|
||||||
|
func (tt TensorType) NumBytes() float64 {
|
||||||
|
return float64(tt.typeSize()) / float64(tt.blockSize())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tt TensorType) typeSize() int64 {
|
||||||
|
switch tt {
|
||||||
|
case TensorTypeF32:
|
||||||
|
return 4
|
||||||
|
case TensorTypeF16:
|
||||||
|
return 2
|
||||||
|
case TensorTypeQ4_0:
|
||||||
|
return 2 + tt.blockSize()/2
|
||||||
|
case TensorTypeQ4_1:
|
||||||
|
return 2 + 2 + tt.blockSize()/2
|
||||||
|
case TensorTypeQ5_0:
|
||||||
|
return 2 + 4 + tt.blockSize()/2
|
||||||
|
case TensorTypeQ5_1:
|
||||||
|
return 2 + 2 + 4 + tt.blockSize()/2
|
||||||
|
case TensorTypeQ8_0:
|
||||||
|
return 2 + tt.blockSize()
|
||||||
|
case TensorTypeQ8_1:
|
||||||
|
return 2 + 2 + tt.blockSize()
|
||||||
|
case TensorTypeQ2_K:
|
||||||
|
return tt.blockSize()/16 + tt.blockSize()/4 + 2 + 2
|
||||||
|
case TensorTypeQ3_K:
|
||||||
|
return tt.blockSize()/8 + tt.blockSize()/4 + 12 + 2
|
||||||
|
case TensorTypeQ4_K:
|
||||||
|
return 2 + 2 + 12 + tt.blockSize()/2
|
||||||
|
case TensorTypeQ5_K:
|
||||||
|
return 2 + 2 + 12 + tt.blockSize()/8 + tt.blockSize()/2
|
||||||
|
case TensorTypeQ6_K:
|
||||||
|
return tt.blockSize()/2 + tt.blockSize()/4 + tt.blockSize()/16 + 2
|
||||||
|
case TensorTypeQ8_K:
|
||||||
|
return 4 + tt.blockSize() + 2*tt.blockSize()/16
|
||||||
|
case tensorTypeIQ2_XXS:
|
||||||
|
return 2 + 2*tt.blockSize()/8
|
||||||
|
case tensorTypeIQ2_XS:
|
||||||
|
return 2 + 2*tt.blockSize()/8 + tt.blockSize()/32
|
||||||
|
case tensorTypeIQ3_XXS:
|
||||||
|
return 2 + tt.blockSize()/4 + tt.blockSize()/8
|
||||||
|
case tensorTypeIQ1_S:
|
||||||
|
return 2 + tt.blockSize()/8 + tt.blockSize()/16
|
||||||
|
case tensorTypeIQ4_NL:
|
||||||
|
return 2 + tt.blockSize()/2
|
||||||
|
case tensorTypeIQ3_S:
|
||||||
|
return 2 + tt.blockSize()/4 + tt.blockSize()/8 + tt.blockSize()/32 + 4
|
||||||
|
case tensorTypeIQ2_S:
|
||||||
|
return 2 + tt.blockSize()/4 + tt.blockSize()/16
|
||||||
|
case tensorTypeIQ4_XS:
|
||||||
|
return 2 + 2 + tt.blockSize()/2 + tt.blockSize()/64
|
||||||
|
case TensorTypeI8:
|
||||||
|
return 1
|
||||||
|
case TensorTypeI16:
|
||||||
|
return 2
|
||||||
|
case TensorTypeI32:
|
||||||
|
return 4
|
||||||
|
case TensorTypeI64:
|
||||||
|
return 8
|
||||||
|
case TensorTypeF64:
|
||||||
|
return 8
|
||||||
|
case tensorTypeIQ1_M:
|
||||||
|
return tt.blockSize()/8 + tt.blockSize()/16 + tt.blockSize()/32
|
||||||
|
case TensorTypeBF16:
|
||||||
|
return 2
|
||||||
|
default:
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tt TensorType) blockSize() int64 {
|
||||||
|
switch tt {
|
||||||
|
case TensorTypeF32,
|
||||||
|
TensorTypeF16,
|
||||||
|
TensorTypeI8,
|
||||||
|
TensorTypeI16,
|
||||||
|
TensorTypeI32,
|
||||||
|
TensorTypeI64,
|
||||||
|
TensorTypeF64,
|
||||||
|
TensorTypeBF16:
|
||||||
|
return 1
|
||||||
|
case TensorTypeQ4_0,
|
||||||
|
TensorTypeQ4_1,
|
||||||
|
TensorTypeQ5_0,
|
||||||
|
TensorTypeQ5_1,
|
||||||
|
TensorTypeQ8_0,
|
||||||
|
TensorTypeQ8_1,
|
||||||
|
tensorTypeIQ4_NL:
|
||||||
|
return 32
|
||||||
|
default:
|
||||||
|
return 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tt TensorType) String() string {
|
||||||
|
switch tt {
|
||||||
|
case TensorTypeF32:
|
||||||
|
return "f32"
|
||||||
|
case TensorTypeF16:
|
||||||
|
return "f16"
|
||||||
|
case TensorTypeQ4_0:
|
||||||
|
return "q4_0"
|
||||||
|
case TensorTypeQ4_1:
|
||||||
|
return "q4_1"
|
||||||
|
case tensorTypeQ4_2:
|
||||||
|
return "q4_2"
|
||||||
|
case tensorTypeQ4_3:
|
||||||
|
return "q4_3"
|
||||||
|
case TensorTypeQ5_0:
|
||||||
|
return "q5_0"
|
||||||
|
case TensorTypeQ5_1:
|
||||||
|
return "q5_1"
|
||||||
|
case TensorTypeQ8_0:
|
||||||
|
return "q8_0"
|
||||||
|
case TensorTypeQ8_1:
|
||||||
|
return "q8_1"
|
||||||
|
case TensorTypeQ2_K:
|
||||||
|
return "q2_k"
|
||||||
|
case TensorTypeQ3_K:
|
||||||
|
return "q3_k"
|
||||||
|
case TensorTypeQ4_K:
|
||||||
|
return "q4_k"
|
||||||
|
case TensorTypeQ5_K:
|
||||||
|
return "q5_k"
|
||||||
|
case TensorTypeQ6_K:
|
||||||
|
return "q6_k"
|
||||||
|
case TensorTypeQ8_K:
|
||||||
|
return "q8_k"
|
||||||
|
case tensorTypeIQ2_XXS:
|
||||||
|
return "iq2_xxs"
|
||||||
|
case tensorTypeIQ2_XS:
|
||||||
|
return "iq2_xs"
|
||||||
|
case tensorTypeIQ3_XXS:
|
||||||
|
return "iq3_xxs"
|
||||||
|
case tensorTypeIQ1_S:
|
||||||
|
return "iq1_s"
|
||||||
|
case tensorTypeIQ4_NL:
|
||||||
|
return "iq4_nl"
|
||||||
|
case tensorTypeIQ3_S:
|
||||||
|
return "iq3_s"
|
||||||
|
case tensorTypeIQ2_S:
|
||||||
|
return "iq2_s"
|
||||||
|
case tensorTypeIQ4_XS:
|
||||||
|
return "iq4_xs"
|
||||||
|
case TensorTypeI8:
|
||||||
|
return "i8"
|
||||||
|
case TensorTypeI16:
|
||||||
|
return "i16"
|
||||||
|
case TensorTypeI32:
|
||||||
|
return "i32"
|
||||||
|
case TensorTypeI64:
|
||||||
|
return "i64"
|
||||||
|
case TensorTypeF64:
|
||||||
|
return "f64"
|
||||||
|
case tensorTypeIQ1_M:
|
||||||
|
return "iq1_m"
|
||||||
|
case TensorTypeBF16:
|
||||||
|
return "bf16"
|
||||||
|
case tensorTypeQ4_0_4_4:
|
||||||
|
return "q4_0_4_4"
|
||||||
|
case tensorTypeQ4_0_4_8:
|
||||||
|
return "q4_0_4_8"
|
||||||
|
case tensorTypeQ4_0_8_8:
|
||||||
|
return "q4_0_8_8"
|
||||||
|
case tensorTypeTQ1_0:
|
||||||
|
return "tq1_0"
|
||||||
|
case tensorTypeTQ2_0:
|
||||||
|
return "tq2_0"
|
||||||
|
case tensorTypeIQ4_NL_4_4:
|
||||||
|
return "iq4_nl_4_4"
|
||||||
|
case tensorTypeIQ4_NL_4_8:
|
||||||
|
return "iq4_nl_4_8"
|
||||||
|
case tensorTypeIQ4_NL_8_8:
|
||||||
|
return "iq4_nl_8_8"
|
||||||
|
default:
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tt TensorType) LogValue() slog.Value {
|
||||||
|
return slog.GroupValue(
|
||||||
|
slog.Uint64("value", uint64(tt)),
|
||||||
|
slog.String("name", strings.ToUpper(tt.String())),
|
||||||
|
slog.Int64("size", tt.typeSize()),
|
||||||
|
slog.Int64("block_size", tt.blockSize()),
|
||||||
|
slog.Float64("num_bytes", tt.NumBytes()),
|
||||||
|
)
|
||||||
|
}
|
||||||
2
go.mod
2
go.mod
@@ -19,7 +19,7 @@ require (
|
|||||||
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
|
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
|
||||||
github.com/dlclark/regexp2 v1.11.4
|
github.com/dlclark/regexp2 v1.11.4
|
||||||
github.com/emirpasic/gods/v2 v2.0.0-alpha
|
github.com/emirpasic/gods/v2 v2.0.0-alpha
|
||||||
github.com/google/go-cmp v0.6.0
|
github.com/google/go-cmp v0.7.0
|
||||||
github.com/mattn/go-runewidth v0.0.14
|
github.com/mattn/go-runewidth v0.0.14
|
||||||
github.com/nlpodyssey/gopickle v0.3.0
|
github.com/nlpodyssey/gopickle v0.3.0
|
||||||
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
|
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
|
||||||
|
|||||||
4
go.sum
4
go.sum
@@ -112,8 +112,8 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
|
|||||||
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||||
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
|
|||||||
@@ -45,6 +45,8 @@ var (
|
|||||||
"qwen2.5-coder:latest",
|
"qwen2.5-coder:latest",
|
||||||
"qwen:latest",
|
"qwen:latest",
|
||||||
"solar-pro:latest",
|
"solar-pro:latest",
|
||||||
|
"codellama:latest",
|
||||||
|
"nous-hermes:latest",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,32 @@
|
|||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Daniel Hiltgen <daniel@ollama.com>
|
||||||
|
Date: Sun, 22 Jun 2025 09:22:05 -0700
|
||||||
|
Subject: [PATCH] temporary prevent rocm+cuda mixed loading
|
||||||
|
|
||||||
|
---
|
||||||
|
ggml/src/ggml-backend-reg.cpp | 12 ++++++++++--
|
||||||
|
1 file changed, 10 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
|
||||||
|
index 4e67d243..8f49f084 100644
|
||||||
|
--- a/ggml/src/ggml-backend-reg.cpp
|
||||||
|
+++ b/ggml/src/ggml-backend-reg.cpp
|
||||||
|
@@ -573,8 +573,16 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
|
||||||
|
|
||||||
|
ggml_backend_load_best("blas", silent, dir_path);
|
||||||
|
ggml_backend_load_best("cann", silent, dir_path);
|
||||||
|
- ggml_backend_load_best("cuda", silent, dir_path);
|
||||||
|
- ggml_backend_load_best("hip", silent, dir_path);
|
||||||
|
+
|
||||||
|
+ // Avoid mixed hip+cuda configurations
|
||||||
|
+ const char * hip_devices = std::getenv("HIP_VISIBLE_DEVICES");
|
||||||
|
+ const char * rocr_devices = std::getenv("ROCR_VISIBLE_DEVICES");
|
||||||
|
+ if (!hip_devices && !rocr_devices) {
|
||||||
|
+ ggml_backend_load_best("cuda", silent, dir_path);
|
||||||
|
+ } else {
|
||||||
|
+ ggml_backend_load_best("hip", silent, dir_path);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
ggml_backend_load_best("kompute", silent, dir_path);
|
||||||
|
ggml_backend_load_best("metal", silent, dir_path);
|
||||||
|
ggml_backend_load_best("rpc", silent, dir_path);
|
||||||
@@ -139,6 +139,13 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
|||||||
gpus = discover.GetCPUInfo()
|
gpus = discover.GetCPUInfo()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify the requested context size is <= the model training size
|
||||||
|
trainCtx := f.KV().ContextLength()
|
||||||
|
if opts.NumCtx/numParallel > int(trainCtx) && trainCtx > 0 {
|
||||||
|
slog.Warn("requested context size too large for model", "num_ctx", opts.NumCtx, "num_parallel", numParallel, "n_ctx_train", trainCtx)
|
||||||
|
opts.NumCtx = int(trainCtx) * numParallel
|
||||||
|
}
|
||||||
|
|
||||||
estimate := EstimateGPULayers(gpus, f, projectors, opts, numParallel)
|
estimate := EstimateGPULayers(gpus, f, projectors, opts, numParallel)
|
||||||
if len(gpus) > 1 || gpus[0].Library != "cpu" {
|
if len(gpus) > 1 || gpus[0].Library != "cpu" {
|
||||||
switch {
|
switch {
|
||||||
@@ -311,7 +318,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
|||||||
params = append(params, "--mmproj", projectors[0])
|
params = append(params, "--mmproj", projectors[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
// iterate through compatible GPU libraries such as 'cuda_v12', 'cuda_v11', 'rocm', etc.
|
// iterate through compatible GPU libraries such as 'cuda_v12', 'rocm', etc.
|
||||||
// adding each library's respective path to the LD_LIBRARY_PATH, until finally running
|
// adding each library's respective path to the LD_LIBRARY_PATH, until finally running
|
||||||
// without any LD_LIBRARY_PATH flags
|
// without any LD_LIBRARY_PATH flags
|
||||||
for {
|
for {
|
||||||
|
|||||||
@@ -602,7 +602,9 @@ func (c *Context) Forward(tensors ...ml.Tensor) ml.Context {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (c *Context) Compute(tensors ...ml.Tensor) {
|
func (c *Context) Compute(tensors ...ml.Tensor) {
|
||||||
C.ggml_backend_sched_graph_compute_async(c.b.sched, c.graph)
|
if status := C.ggml_backend_sched_graph_compute_async(c.b.sched, c.graph); status != C.GGML_STATUS_SUCCESS {
|
||||||
|
panic(fmt.Errorf("error computing ggml graph: %v", status))
|
||||||
|
}
|
||||||
C.ggml_backend_sched_reset(c.b.sched)
|
C.ggml_backend_sched_reset(c.b.sched)
|
||||||
|
|
||||||
needSync := true
|
needSync := true
|
||||||
|
|||||||
12
ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
vendored
12
ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
vendored
@@ -573,8 +573,16 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
|
|||||||
|
|
||||||
ggml_backend_load_best("blas", silent, dir_path);
|
ggml_backend_load_best("blas", silent, dir_path);
|
||||||
ggml_backend_load_best("cann", silent, dir_path);
|
ggml_backend_load_best("cann", silent, dir_path);
|
||||||
ggml_backend_load_best("cuda", silent, dir_path);
|
|
||||||
ggml_backend_load_best("hip", silent, dir_path);
|
// Avoid mixed hip+cuda configurations
|
||||||
|
const char * hip_devices = std::getenv("HIP_VISIBLE_DEVICES");
|
||||||
|
const char * rocr_devices = std::getenv("ROCR_VISIBLE_DEVICES");
|
||||||
|
if (!hip_devices && !rocr_devices) {
|
||||||
|
ggml_backend_load_best("cuda", silent, dir_path);
|
||||||
|
} else {
|
||||||
|
ggml_backend_load_best("hip", silent, dir_path);
|
||||||
|
}
|
||||||
|
|
||||||
ggml_backend_load_best("kompute", silent, dir_path);
|
ggml_backend_load_best("kompute", silent, dir_path);
|
||||||
ggml_backend_load_best("metal", silent, dir_path);
|
ggml_backend_load_best("metal", silent, dir_path);
|
||||||
ggml_backend_load_best("rpc", silent, dir_path);
|
ggml_backend_load_best("rpc", silent, dir_path);
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ function checkEnv() {
|
|||||||
$env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0]
|
$env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0]
|
||||||
}
|
}
|
||||||
# Locate CUDA versions
|
# Locate CUDA versions
|
||||||
# Note: this assumes every version found will be built
|
|
||||||
$cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
|
$cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
|
||||||
if ($cudaList.length -eq 0) {
|
if ($cudaList.length -eq 0) {
|
||||||
$d=(get-command -ea 'silentlycontinue' nvcc).path
|
$d=(get-command -ea 'silentlycontinue' nvcc).path
|
||||||
@@ -94,19 +93,6 @@ function buildOllama() {
|
|||||||
|
|
||||||
$hashEnv = @{}
|
$hashEnv = @{}
|
||||||
Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
|
Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
|
||||||
if ("$script:CUDA_DIRS".Contains("v11")) {
|
|
||||||
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $v11="$_" }}
|
|
||||||
$env:CUDAToolkit_ROOT=$hashEnv[$v11]
|
|
||||||
write-host "Building CUDA v11 backend libraries"
|
|
||||||
# Note: cuda v11 requires msvc 2019 so force the older generator
|
|
||||||
# to avoid 2022 (or newer) from being used as the default
|
|
||||||
& cmake --fresh --preset "CUDA 11" -G "Visual Studio 16 2019" --install-prefix $script:DIST_DIR
|
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
|
||||||
& cmake --build --preset "CUDA 11" --config Release --parallel $script:JOBS
|
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
|
||||||
& cmake --install build --component "CUDA" --strip
|
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
|
||||||
}
|
|
||||||
if ("$script:CUDA_DIRS".Contains("v12")) {
|
if ("$script:CUDA_DIRS".Contains("v12")) {
|
||||||
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12")) { $v12="$_" }}
|
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12")) { $v12="$_" }}
|
||||||
$env:CUDAToolkit_ROOT=$hashEnv[$v12]
|
$env:CUDAToolkit_ROOT=$hashEnv[$v12]
|
||||||
|
|||||||
@@ -10,9 +10,7 @@ OLLAMA_COMMON_BUILD_ARGS="--build-arg=VERSION \
|
|||||||
--build-arg=GOFLAGS \
|
--build-arg=GOFLAGS \
|
||||||
--build-arg=OLLAMA_CUSTOM_CPU_DEFS \
|
--build-arg=OLLAMA_CUSTOM_CPU_DEFS \
|
||||||
--build-arg=OLLAMA_SKIP_CUDA_GENERATE \
|
--build-arg=OLLAMA_SKIP_CUDA_GENERATE \
|
||||||
--build-arg=OLLAMA_SKIP_CUDA_11_GENERATE \
|
|
||||||
--build-arg=OLLAMA_SKIP_CUDA_12_GENERATE \
|
--build-arg=OLLAMA_SKIP_CUDA_12_GENERATE \
|
||||||
--build-arg=CUDA_V11_ARCHITECTURES \
|
|
||||||
--build-arg=CUDA_V12_ARCHITECTURES \
|
--build-arg=CUDA_V12_ARCHITECTURES \
|
||||||
--build-arg=OLLAMA_SKIP_ROCM_GENERATE \
|
--build-arg=OLLAMA_SKIP_ROCM_GENERATE \
|
||||||
--build-arg=OLLAMA_FAST_BUILD \
|
--build-arg=OLLAMA_FAST_BUILD \
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ import (
|
|||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/fs/gguf"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
"github.com/ollama/ollama/template"
|
"github.com/ollama/ollama/template"
|
||||||
"github.com/ollama/ollama/thinking"
|
"github.com/ollama/ollama/thinking"
|
||||||
@@ -73,22 +73,18 @@ func (m *Model) Capabilities() []model.Capability {
|
|||||||
capabilities := []model.Capability{}
|
capabilities := []model.Capability{}
|
||||||
|
|
||||||
// Check for completion capability
|
// Check for completion capability
|
||||||
r, err := os.Open(m.ModelPath)
|
f, err := gguf.Open(m.ModelPath)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
defer r.Close()
|
defer f.Close()
|
||||||
|
|
||||||
f, err := ggml.Decode(r, 1024)
|
if f.KeyValue("pooling_type").Valid() {
|
||||||
if err == nil {
|
capabilities = append(capabilities, model.CapabilityEmbedding)
|
||||||
if _, ok := f.KV()[fmt.Sprintf("%s.pooling_type", f.KV().Architecture())]; ok {
|
|
||||||
capabilities = append(capabilities, model.CapabilityEmbedding)
|
|
||||||
} else {
|
|
||||||
capabilities = append(capabilities, model.CapabilityCompletion)
|
|
||||||
}
|
|
||||||
if _, ok := f.KV()[fmt.Sprintf("%s.vision.block_count", f.KV().Architecture())]; ok {
|
|
||||||
capabilities = append(capabilities, model.CapabilityVision)
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
slog.Error("couldn't decode ggml", "error", err)
|
// If no embedding is specified, we assume the model supports completion
|
||||||
|
capabilities = append(capabilities, model.CapabilityCompletion)
|
||||||
|
}
|
||||||
|
if f.KeyValue("vision.block_count").Valid() {
|
||||||
|
capabilities = append(capabilities, model.CapabilityVision)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
slog.Error("couldn't open model file", "error", err)
|
slog.Error("couldn't open model file", "error", err)
|
||||||
|
|||||||
@@ -1,123 +1,42 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"encoding/binary"
|
|
||||||
"errors"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/fs/ggml"
|
||||||
"github.com/ollama/ollama/template"
|
"github.com/ollama/ollama/template"
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Constants for GGUF magic bytes and version
|
|
||||||
var (
|
|
||||||
ggufMagic = []byte{0x47, 0x47, 0x55, 0x46} // "GGUF"
|
|
||||||
ggufVer = uint32(3) // Version 3
|
|
||||||
)
|
|
||||||
|
|
||||||
// Helper function to create mock GGUF data
|
|
||||||
func createMockGGUFData(architecture string, vision bool) []byte {
|
|
||||||
var buf bytes.Buffer
|
|
||||||
|
|
||||||
// Write GGUF header
|
|
||||||
buf.Write(ggufMagic)
|
|
||||||
binary.Write(&buf, binary.LittleEndian, ggufVer)
|
|
||||||
|
|
||||||
// Write tensor count (0 for our test)
|
|
||||||
var numTensors uint64 = 0
|
|
||||||
binary.Write(&buf, binary.LittleEndian, numTensors)
|
|
||||||
|
|
||||||
// Calculate number of metadata entries
|
|
||||||
numMetaEntries := uint64(1) // architecture entry
|
|
||||||
if vision {
|
|
||||||
numMetaEntries++
|
|
||||||
}
|
|
||||||
// Add embedding entry if architecture is "bert"
|
|
||||||
if architecture == "bert" {
|
|
||||||
numMetaEntries++
|
|
||||||
}
|
|
||||||
binary.Write(&buf, binary.LittleEndian, numMetaEntries)
|
|
||||||
|
|
||||||
// Write architecture metadata
|
|
||||||
archKey := "general.architecture"
|
|
||||||
keyLen := uint64(len(archKey))
|
|
||||||
binary.Write(&buf, binary.LittleEndian, keyLen)
|
|
||||||
buf.WriteString(archKey)
|
|
||||||
|
|
||||||
// String type (8)
|
|
||||||
var strType uint32 = 8
|
|
||||||
binary.Write(&buf, binary.LittleEndian, strType)
|
|
||||||
|
|
||||||
// String length
|
|
||||||
strLen := uint64(len(architecture))
|
|
||||||
binary.Write(&buf, binary.LittleEndian, strLen)
|
|
||||||
buf.WriteString(architecture)
|
|
||||||
|
|
||||||
if vision {
|
|
||||||
visionKey := architecture + ".vision.block_count"
|
|
||||||
keyLen = uint64(len(visionKey))
|
|
||||||
binary.Write(&buf, binary.LittleEndian, keyLen)
|
|
||||||
buf.WriteString(visionKey)
|
|
||||||
|
|
||||||
// uint32 type (4)
|
|
||||||
var uint32Type uint32 = 4
|
|
||||||
binary.Write(&buf, binary.LittleEndian, uint32Type)
|
|
||||||
|
|
||||||
// uint32 value (1)
|
|
||||||
var countVal uint32 = 1
|
|
||||||
binary.Write(&buf, binary.LittleEndian, countVal)
|
|
||||||
}
|
|
||||||
// Write embedding metadata if architecture is "bert"
|
|
||||||
if architecture == "bert" {
|
|
||||||
poolKey := architecture + ".pooling_type"
|
|
||||||
keyLen = uint64(len(poolKey))
|
|
||||||
binary.Write(&buf, binary.LittleEndian, keyLen)
|
|
||||||
buf.WriteString(poolKey)
|
|
||||||
|
|
||||||
// uint32 type (4)
|
|
||||||
var uint32Type uint32 = 4
|
|
||||||
binary.Write(&buf, binary.LittleEndian, uint32Type)
|
|
||||||
|
|
||||||
// uint32 value (1)
|
|
||||||
var poolingVal uint32 = 1
|
|
||||||
binary.Write(&buf, binary.LittleEndian, poolingVal)
|
|
||||||
}
|
|
||||||
|
|
||||||
return buf.Bytes()
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestModelCapabilities(t *testing.T) {
|
func TestModelCapabilities(t *testing.T) {
|
||||||
// Create a temporary directory for test files
|
// Create completion model (llama architecture without vision)
|
||||||
tempDir := t.TempDir()
|
completionModelPath, _ := createBinFile(t, ggml.KV{
|
||||||
|
"general.architecture": "llama",
|
||||||
|
}, []*ggml.Tensor{})
|
||||||
|
|
||||||
// Create different types of mock model files
|
// Create vision model (llama architecture with vision block count)
|
||||||
completionModelPath := filepath.Join(tempDir, "model.bin")
|
visionModelPath, _ := createBinFile(t, ggml.KV{
|
||||||
visionModelPath := filepath.Join(tempDir, "vision_model.bin")
|
"general.architecture": "llama",
|
||||||
embeddingModelPath := filepath.Join(tempDir, "embedding_model.bin")
|
"llama.vision.block_count": uint32(1),
|
||||||
// Create a simple model file for tests that don't depend on GGUF content
|
}, []*ggml.Tensor{})
|
||||||
simpleModelPath := filepath.Join(tempDir, "simple_model.bin")
|
|
||||||
|
|
||||||
if err := errors.Join(
|
// Create embedding model (bert architecture with pooling type)
|
||||||
os.WriteFile(completionModelPath, createMockGGUFData("llama", false), 0o644),
|
embeddingModelPath, _ := createBinFile(t, ggml.KV{
|
||||||
os.WriteFile(visionModelPath, createMockGGUFData("llama", true), 0o644),
|
"general.architecture": "bert",
|
||||||
os.WriteFile(embeddingModelPath, createMockGGUFData("bert", false), 0o644),
|
"bert.pooling_type": uint32(1),
|
||||||
os.WriteFile(simpleModelPath, []byte("dummy model data"), 0o644),
|
}, []*ggml.Tensor{})
|
||||||
); err != nil {
|
|
||||||
t.Fatalf("Failed to create model files: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
toolsInsertTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}{{ if .suffix }}{{ .suffix }}{{ end }}")
|
toolsInsertTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}{{ if .suffix }}{{ .suffix }}{{ end }}")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse template: %v", err)
|
t.Fatalf("Failed to parse template: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
chatTemplate, err := template.Parse("{{ .prompt }}")
|
chatTemplate, err := template.Parse("{{ .prompt }}")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse template: %v", err)
|
t.Fatalf("Failed to parse template: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
toolsTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}")
|
toolsTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse template: %v", err)
|
t.Fatalf("Failed to parse template: %v", err)
|
||||||
@@ -145,21 +64,13 @@ func TestModelCapabilities(t *testing.T) {
|
|||||||
},
|
},
|
||||||
expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityTools, model.CapabilityInsert},
|
expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityTools, model.CapabilityInsert},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "model with tools and insert capability",
|
|
||||||
model: Model{
|
|
||||||
ModelPath: simpleModelPath,
|
|
||||||
Template: toolsInsertTemplate,
|
|
||||||
},
|
|
||||||
expectedCaps: []model.Capability{model.CapabilityTools, model.CapabilityInsert},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "model with tools capability",
|
name: "model with tools capability",
|
||||||
model: Model{
|
model: Model{
|
||||||
ModelPath: simpleModelPath,
|
ModelPath: completionModelPath,
|
||||||
Template: toolsTemplate,
|
Template: toolsTemplate,
|
||||||
},
|
},
|
||||||
expectedCaps: []model.Capability{model.CapabilityTools},
|
expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityTools},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "model with vision capability",
|
name: "model with vision capability",
|
||||||
@@ -224,29 +135,33 @@ func TestModelCapabilities(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestModelCheckCapabilities(t *testing.T) {
|
func TestModelCheckCapabilities(t *testing.T) {
|
||||||
// Create a temporary directory for test files
|
// Create simple model file for tests that don't depend on GGUF content
|
||||||
tempDir := t.TempDir()
|
completionModelPath, _ := createBinFile(t, ggml.KV{
|
||||||
|
"general.architecture": "llama",
|
||||||
|
}, []*ggml.Tensor{})
|
||||||
|
|
||||||
visionModelPath := filepath.Join(tempDir, "vision_model.bin")
|
// Create vision model (llama architecture with vision block count)
|
||||||
simpleModelPath := filepath.Join(tempDir, "model.bin")
|
visionModelPath, _ := createBinFile(t, ggml.KV{
|
||||||
embeddingModelPath := filepath.Join(tempDir, "embedding_model.bin")
|
"general.architecture": "llama",
|
||||||
|
"llama.vision.block_count": uint32(1),
|
||||||
|
}, []*ggml.Tensor{})
|
||||||
|
|
||||||
if err := errors.Join(
|
// Create embedding model (bert architecture with pooling type)
|
||||||
os.WriteFile(simpleModelPath, []byte("dummy model data"), 0o644),
|
embeddingModelPath, _ := createBinFile(t, ggml.KV{
|
||||||
os.WriteFile(visionModelPath, createMockGGUFData("llama", true), 0o644),
|
"general.architecture": "bert",
|
||||||
os.WriteFile(embeddingModelPath, createMockGGUFData("bert", false), 0o644),
|
"bert.pooling_type": uint32(1),
|
||||||
); err != nil {
|
}, []*ggml.Tensor{})
|
||||||
t.Fatalf("Failed to create model files: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
toolsInsertTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}{{ if .suffix }}{{ .suffix }}{{ end }}")
|
toolsInsertTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}{{ if .suffix }}{{ .suffix }}{{ end }}")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse template: %v", err)
|
t.Fatalf("Failed to parse template: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
chatTemplate, err := template.Parse("{{ .prompt }}")
|
chatTemplate, err := template.Parse("{{ .prompt }}")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse template: %v", err)
|
t.Fatalf("Failed to parse template: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
toolsTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}")
|
toolsTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse template: %v", err)
|
t.Fatalf("Failed to parse template: %v", err)
|
||||||
@@ -261,7 +176,7 @@ func TestModelCheckCapabilities(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "completion model without tools capability",
|
name: "completion model without tools capability",
|
||||||
model: Model{
|
model: Model{
|
||||||
ModelPath: simpleModelPath,
|
ModelPath: completionModelPath,
|
||||||
Template: chatTemplate,
|
Template: chatTemplate,
|
||||||
},
|
},
|
||||||
checkCaps: []model.Capability{model.CapabilityTools},
|
checkCaps: []model.Capability{model.CapabilityTools},
|
||||||
@@ -270,7 +185,7 @@ func TestModelCheckCapabilities(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "model with all needed capabilities",
|
name: "model with all needed capabilities",
|
||||||
model: Model{
|
model: Model{
|
||||||
ModelPath: simpleModelPath,
|
ModelPath: completionModelPath,
|
||||||
Template: toolsInsertTemplate,
|
Template: toolsInsertTemplate,
|
||||||
},
|
},
|
||||||
checkCaps: []model.Capability{model.CapabilityTools, model.CapabilityInsert},
|
checkCaps: []model.Capability{model.CapabilityTools, model.CapabilityInsert},
|
||||||
@@ -278,7 +193,7 @@ func TestModelCheckCapabilities(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "model missing insert capability",
|
name: "model missing insert capability",
|
||||||
model: Model{
|
model: Model{
|
||||||
ModelPath: simpleModelPath,
|
ModelPath: completionModelPath,
|
||||||
Template: toolsTemplate,
|
Template: toolsTemplate,
|
||||||
},
|
},
|
||||||
checkCaps: []model.Capability{model.CapabilityInsert},
|
checkCaps: []model.Capability{model.CapabilityInsert},
|
||||||
@@ -287,7 +202,7 @@ func TestModelCheckCapabilities(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "model missing vision capability",
|
name: "model missing vision capability",
|
||||||
model: Model{
|
model: Model{
|
||||||
ModelPath: simpleModelPath,
|
ModelPath: completionModelPath,
|
||||||
Template: toolsTemplate,
|
Template: toolsTemplate,
|
||||||
},
|
},
|
||||||
checkCaps: []model.Capability{model.CapabilityVision},
|
checkCaps: []model.Capability{model.CapabilityVision},
|
||||||
@@ -312,7 +227,7 @@ func TestModelCheckCapabilities(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "unknown capability",
|
name: "unknown capability",
|
||||||
model: Model{
|
model: Model{
|
||||||
ModelPath: simpleModelPath,
|
ModelPath: completionModelPath,
|
||||||
Template: chatTemplate,
|
Template: chatTemplate,
|
||||||
},
|
},
|
||||||
checkCaps: []model.Capability{"unknown"},
|
checkCaps: []model.Capability{"unknown"},
|
||||||
|
|||||||
@@ -257,16 +257,8 @@ func TestQuantizeModel(t *testing.T) {
|
|||||||
|
|
||||||
for _, tt := range cases {
|
for _, tt := range cases {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
f, err := os.CreateTemp(t.TempDir(), tt.name)
|
p, _ := createBinFile(t, tt.kv, tt.tensors)
|
||||||
if err != nil {
|
fp, err := os.Open(p)
|
||||||
t.Fatal(err.Error())
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
err = fsggml.WriteGGUF(f, tt.kv, tt.tensors)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create initial model: %s", err)
|
|
||||||
}
|
|
||||||
fp, err := os.Open(f.Name())
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err.Error())
|
t.Fatal(err.Error())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -112,11 +112,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
|
|||||||
b.ctx, b.ctxDone = context.WithCancel(ctx)
|
b.ctx, b.ctxDone = context.WithCancel(ctx)
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
f, err := os.CreateTemp(t.TempDir(), modelName)
|
p, _ := createBinFile(t, ggml.KV{
|
||||||
require.NoError(t, err)
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
require.NoError(t, ggml.WriteGGUF(f, ggml.KV{
|
|
||||||
"general.architecture": "llama",
|
"general.architecture": "llama",
|
||||||
"llama.context_length": uint32(32),
|
"llama.context_length": uint32(32),
|
||||||
"llama.embedding_length": uint32(4096),
|
"llama.embedding_length": uint32(4096),
|
||||||
@@ -129,14 +125,14 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
|
|||||||
}, []*ggml.Tensor{
|
}, []*ggml.Tensor{
|
||||||
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||||
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||||
}))
|
})
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
fname := f.Name()
|
|
||||||
model := &Model{Name: modelName, ModelPath: fname}
|
|
||||||
b.f, err = llm.LoadModel(model.ModelPath, 0)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
|
model := &Model{Name: modelName, ModelPath: p}
|
||||||
|
f, err := llm.LoadModel(model.ModelPath, 0)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
b.f = f
|
||||||
if duration == nil {
|
if duration == nil {
|
||||||
duration = &api.Duration{Duration: 5 * time.Millisecond}
|
duration = &api.Duration{Duration: 5 * time.Millisecond}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user