Compare commits

...

16 Commits

Author SHA1 Message Date
Bruce MacDonald
60f0b7db76 working 2025-01-24 16:51:19 -08:00
Jesse Gross
0d22c0ec1a new runner 2025-01-21 10:54:22 -08:00
Jesse Gross
a34960a516 tensor loading iface 2025-01-21 10:54:16 -08:00
Michael Yang
9722d96db2 docker 2025-01-20 17:06:45 -08:00
Michael Yang
fd868e5a88 default native cuda architecture when possible 2025-01-20 16:19:52 -08:00
Michael Yang
a48bd5bbe7 cpu all 2025-01-20 15:40:05 -08:00
Michael Yang
ffd9914d0a update ci 2025-01-20 09:39:43 -08:00
Michael Yang
9d249bfcab sort devices by type 2025-01-20 09:39:43 -08:00
Michael Yang
8cb7b94c40 next ollama runner
implement llama and mllama model architectures in go using ggml (through
cgo)
2025-01-20 09:39:43 -08:00
EndoTheDev
7bb356c680 docs: update suspend header in gpu.md (#8487) 2025-01-19 18:45:35 -08:00
Jannik Maierhöfer
021817e59a readme: add link to Langfuse (#8455) 2025-01-16 22:41:12 -08:00
Patrick Devine
a420a453b4 fix default modelfile for create (#8452) 2025-01-16 01:14:04 -08:00
Jeffrey Morgan
42cf4db601 parser: fix parsing Modelfiles with multiple FROM commands (#8449) 2025-01-16 00:14:04 -08:00
Josh
93a8daf285 convert: import support for command-r models from safetensors (#6063)
---------

Co-authored-by: Patrick Devine <patrick@infrahq.com>
2025-01-15 16:31:22 -08:00
Gloryjaw
a041b4df7c docs: fix path to examples (#8438) 2025-01-15 11:49:12 -08:00
Patrick Devine
2539f2dbf9 Fix absolute path names + gguf detection (#8428) 2025-01-14 19:01:24 -08:00
616 changed files with 12686 additions and 11173 deletions

9
.gitattributes vendored
View File

@@ -7,5 +7,14 @@ llama/**/*.cuh linguist-vendored
llama/**/*.m linguist-vendored llama/**/*.m linguist-vendored
llama/**/*.metal linguist-vendored llama/**/*.metal linguist-vendored
ml/backend/**/*.c linguist-vendored
ml/backend/**/*.h linguist-vendored
ml/backend/**/*.cpp linguist-vendored
ml/backend/**/*.hpp linguist-vendored
ml/backend/**/*.cu linguist-vendored
ml/backend/**/*.cuh linguist-vendored
ml/backend/**/*.m linguist-vendored
ml/backend/**/*.metal linguist-vendored
* text=auto * text=auto
*.go text eol=lf *.go text eol=lf

View File

@@ -478,243 +478,77 @@ jobs:
dist/OllamaSetup.exe dist/OllamaSetup.exe
dist/ollama-windows-*.zip dist/ollama-windows-*.zip
# Linux x86 assets built using the container based build build-linux:
build-linux-amd64:
environment: release environment: release
runs-on: linux runs-on: linux
env:
PLATFORM: linux/amd64
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Set Version
shell: bash
run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
- run: |
./scripts/build_linux.sh
- uses: actions/upload-artifact@v4
with:
name: dist-linux-amd64
path: |
dist/*linux*
!dist/*-cov
# Linux ARM assets built using the container based build
# (at present, docker isn't pre-installed on arm ubunutu images)
build-linux-arm64:
environment: release
runs-on: linux-arm64
env:
PLATFORM: linux/arm64
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Set Version
shell: bash
run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
- name: 'Install Docker'
run: |
# Add Docker's official GPG key:
env
uname -a
sudo apt-get update
sudo apt-get install -y ca-certificates curl
sudo install -m 0755 -d /etc/apt/keyrings
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
sudo chmod a+r /etc/apt/keyrings/docker.asc
# Add the repository to Apt sources:
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
sudo usermod -aG docker $USER
sudo apt-get install acl
sudo setfacl --modify user:$USER:rw /var/run/docker.sock
- run: |
./scripts/build_linux.sh
- uses: actions/upload-artifact@v4
with:
name: dist-linux-arm64
path: |
dist/*linux*
!dist/*-cov
# Container image build
build-container-image:
environment: release
strategy: strategy:
matrix: matrix:
runner: include:
- linux - os: linux
- linux-arm64 arch: amd64
runs-on: ${{ matrix.runner }} targets: [archive, rocm]
env: - os: linux
FINAL_IMAGE_REPO: ollama/ollama arch: arm64
targets: [archive]
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: docker/setup-qemu-action@v3
- uses: docker/setup-buildx-action@v3
- run: |
apt-get update && apt-get install pigz
for TARGET in ${{ matrix.targets }}; do docker buildx build --platform $PLATFORM --target $TARGET --output type=local,dest=dist/$PLATFORM .; done
tar c -C dist/$PLATFORM . | pigz -9cv >dist/ollama-${PLATFORM//\//-}.tar.gz
env:
PLATFORM: ${{ matrix.os }}/${{ matrix.arch }}
- uses: actions/upload-artifact@v4
with: with:
submodules: recursive name: dist-${{ matrix.os }}-${{ matrix.arch }}
- name: 'Install Docker' path: |
if: ${{ startsWith(matrix.runner, 'linux-arm64') }} dist/ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.gz
run: |
sudo apt-get update build-docker:
sudo apt-get install -y ca-certificates curl
sudo install -m 0755 -d /etc/apt/keyrings
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
sudo chmod a+r /etc/apt/keyrings/docker.asc
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
sudo usermod -aG docker $USER
sudo apt-get install acl
sudo setfacl --modify user:$USER:rw /var/run/docker.sock
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.FINAL_IMAGE_REPO }}
flavor: |
latest=false
tags: |
type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
type=semver,pattern={{version}}
- name: Set Version
shell: bash
run: |
machine=$(uname -m)
case ${machine} in
x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
esac >>$GITHUB_ENV
echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ vars.DOCKER_USER }}
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
- name: Build and push by digest
id: build
uses: docker/build-push-action@v6
with:
context: "."
platforms: linux/${{ env.ARCH }}
build-args: |
GOFLAGS
outputs: type=image,name=${{ env.FINAL_IMAGE_REPO }},push-by-digest=true,name-canonical=true,push=true
- name: Export digest
run: |
mkdir -p /tmp/digests
digest="${{ steps.build.outputs.digest }}"
touch "/tmp/digests/${digest#sha256:}"
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digests-${{ env.PLATFORM_PAIR }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
merge:
environment: release environment: release
runs-on: linux runs-on: ubuntu-latest
needs: strategy:
- build-container-image matrix:
env: include:
FINAL_IMAGE_REPO: ollama/ollama - flavor: |
latest=auto
platforms: linux/amd64,linux/arm64
build-args: [GOFLAGS]
- flavor: |
suffix=-rocm,onlatest=false
platforms: linux/amd64
build-args: [GOFLAGS, FLAVOR=rocm]
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: - uses: docker/setup-qemu-action@v2
submodules: recursive - uses: docker/setup-buildx-action@v2
- name: Download digests - uses: docker/login-action@v3
uses: actions/download-artifact@v4
with:
path: /tmp/digests
pattern: digests-*
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.FINAL_IMAGE_REPO }}
flavor: |
latest=false
tags: |
type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
type=semver,pattern={{version}}
- name: Set Version
shell: bash
run: |
machine=$(uname -m)
case ${machine} in
x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
esac >>$GITHUB_ENV
echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
- name: Login to Docker Hub
uses: docker/login-action@v3
with: with:
username: ${{ vars.DOCKER_USER }} username: ${{ vars.DOCKER_USER }}
password: ${{ secrets.DOCKER_ACCESS_TOKEN }} password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
- name: Create manifest list and push - id: metadata
working-directory: /tmp/digests uses: docker/metadata-action@v4
run: |
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.FINAL_IMAGE_REPO }}@sha256:%s ' *)
- name: Inspect image
run: |
docker buildx imagetools inspect ${{ env.FINAL_IMAGE_REPO }}:${{ steps.meta.outputs.version }}
build-container-image-rocm:
environment: release
runs-on: linux
env:
FINAL_IMAGE_REPO: ollama/ollama
ARCH: amd64
PLATFORM_PAIR: linux-amd64
steps:
- uses: actions/checkout@v4
with: with:
submodules: recursive flavor: ${{ matrix.flavor }}
- name: Docker meta images: |
id: meta ollama/ollama
uses: docker/metadata-action@v5
with:
images: ${{ env.FINAL_IMAGE_REPO }}
flavor: |
latest=false
tags: | tags: |
type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
type=semver,pattern={{version}} type=semver,pattern={{version}}
- name: Set Version - uses: docker/build-push-action@v6
shell: bash
run: |
echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with: with:
username: ${{ vars.DOCKER_USER }} context: .
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
- name: Build and push by digest
id: build
uses: docker/build-push-action@v6
with:
context: "."
target: runtime-rocm
build-args: |
GOFLAGS
tags: ${{ env.FINAL_IMAGE_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION}}-rocm
push: true push: true
platforms: ${{ matrix.platforms }}
build-args: ${{ matrix.build-args }}
tags: ${{ steps.metadata.outputs.tags }}
labels: ${{ steps.metadata.outputs.labels }}
cache-from: type=registry,ref=ollama/ollama:latest
cache-to: type=inline
provenance: false
env:
GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ steps.metadata.outputs.version }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
# Aggregate all the assets and ship a release # Aggregate all the assets and ship a release
release: release:

View File

@@ -1,11 +1,5 @@
name: test name: test
env:
ROCM_WINDOWS_URL: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe
MSYS2_URL: https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe
CUDA_12_WINDOWS_URL: https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe
CUDA_12_WINDOWS_VER: 12.4
concurrency: concurrency:
# For PRs, later CI runs preempt previous ones. e.g. a force push on a PR # For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
# cancels running CI jobs and starts all new ones. # cancels running CI jobs and starts all new ones.
@@ -27,7 +21,7 @@ jobs:
changes: changes:
runs-on: ubuntu-latest runs-on: ubuntu-latest
outputs: outputs:
RUNNERS: ${{ steps.changes.outputs.RUNNERS }} changed: ${{ steps.changes.outputs.changed }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
@@ -35,309 +29,61 @@ jobs:
- id: changes - id: changes
run: | run: |
changed() { changed() {
git diff-tree -r --no-commit-id --name-only \ local BASE=${{ github.event.pull_request.base.sha }}
$(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \ local HEAD=${{ github.event.pull_request.head.sha }}
${{ github.event.pull_request.head.sha }} \ local MERGE_BASE=$(git merge-base $BASE $HEAD)
git diff-tree -r --no-commit-id --name-only "$MERGE_BASE" "$HEAD" \
| xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))" | xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
} }
{ echo changed=$(changed 'llama/llama.cpp/**' 'ml/backend/ggml/ggml/**') | tee -a $GITHUB_OUTPUT
echo RUNNERS=$(changed 'llama/**')
} >>$GITHUB_OUTPUT
runners-linux-cuda: linux:
needs: [changes] needs: [changes]
if: ${{ needs.changes.outputs.RUNNERS == 'True' }} if: ${{ needs.changes.outputs.changed == 'True' }}
strategy: strategy:
matrix: matrix:
cuda-version: include:
- '11.8.0' - container: nvidia/cuda:11.8.0-devel-ubuntu22.04
runs-on: linux preset: CUDA
container: nvidia/cuda:${{ matrix.cuda-version }}-devel-ubuntu20.04 - container: rocm/dev-ubuntu-22.04:6.1.2
preset: ROCm
extra-packages: rocm-libs
runs-on: ubuntu-latest
container: ${{ matrix.container }}
steps: steps:
- uses: actions/checkout@v4
- run: | - run: |
apt-get update && apt-get install -y git build-essential curl apt-get update
apt-get install -y cmake pkg-config ${{ matrix.extra-packages }}
env: env:
DEBIAN_FRONTEND: noninteractive DEBIAN_FRONTEND: noninteractive
- uses: actions/checkout@v4
- uses: actions/setup-go@v4
with:
go-version-file: go.mod
cache: true
- run: go get ./...
- run: | - run: |
git config --global --add safe.directory /__w/ollama/ollama cmake --preset ${{ matrix.preset }}
cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l) cmake --build --preset ${{ matrix.preset }} --parallel
make -j $cores cuda_v11
runners-linux-rocm:
needs: [changes]
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
strategy:
matrix:
rocm-version:
- '6.1.2'
runs-on: linux
container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
steps:
- run: |
apt-get update && apt-get install -y git build-essential curl rocm-libs
env:
DEBIAN_FRONTEND: noninteractive
- uses: actions/checkout@v4
- uses: actions/setup-go@v4
with:
go-version-file: go.mod
cache: true
- run: go get ./...
- run: |
git config --global --add safe.directory /__w/ollama/ollama
cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
make -j $cores rocm
# ROCm generation step
runners-windows-rocm:
needs: [changes]
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
runs-on: windows
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true
- name: Set make jobs default
run: |
echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
# ROCM installation steps
- name: 'Cache ROCm installer'
id: cache-rocm
uses: actions/cache@v4
with:
path: rocm-install.exe
key: ${{ env.ROCM_WINDOWS_URL }}
- name: 'Conditionally Download ROCm'
if: steps.cache-rocm.outputs.cache-hit != 'true'
run: |
$ErrorActionPreference = "Stop"
Invoke-WebRequest -Uri "${env:ROCM_WINDOWS_URL}" -OutFile "rocm-install.exe"
- name: 'Install ROCm'
run: |
Start-Process "rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
- name: 'Verify ROCm'
run: |
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
echo "HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path | select -first 1)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
- name: Add msys paths
run: |
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
- name: make rocm runner
run: |
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
make -C llama print-HIP_PATH print-HIP_LIB_DIR
make rocm
# CUDA generation step
runners-windows-cuda:
needs: [changes]
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
runs-on: windows
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true
- name: Set make jobs default
run: |
echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
# CUDA installation steps
- name: 'Cache CUDA installer'
id: cache-cuda
uses: actions/cache@v4
with:
path: cuda-install.exe
key: ${{ env.CUDA_12_WINDOWS_URL }}
- name: 'Conditionally Download CUDA'
if: steps.cache-cuda.outputs.cache-hit != 'true'
run: |
$ErrorActionPreference = "Stop"
Invoke-WebRequest -Uri "${env:CUDA_12_WINDOWS_URL}" -OutFile "cuda-install.exe"
- name: 'Install CUDA'
run: |
$subpackages = @("cudart", "nvcc", "cublas", "cublas_dev") | foreach-object {"${_}_${{ env.CUDA_12_WINDOWS_VER }}"}
Start-Process "cuda-install.exe" -ArgumentList (@("-s") + $subpackages) -NoNewWindow -Wait
- name: 'Verify CUDA'
run: |
& (resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0] --version
$cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
$cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2'
echo "$cudaPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "CUDA_PATH=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
echo "CUDA_PATH_V${cudaVer}=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
- name: Add msys paths
run: |
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
- name: make cuda runner
run: |
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
make cuda_v$(($env:CUDA_PATH | split-path -leaf) -replace 'v(\d+).*', '$1')
runners-cpu:
needs: [changes]
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
arch: [amd64, arm64]
exclude:
- os: ubuntu-latest
arch: arm64
- os: windows-2019
arch: arm64
runs-on: ${{ matrix.os }}
env:
GOARCH: ${{ matrix.arch }}
ARCH: ${{ matrix.arch }}
CGO_ENABLED: '1'
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true
- name: Add msys paths
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
- name: 'Build Windows Go Runners'
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
$gopath=(get-command go).source | split-path -parent
$gccpath=(get-command gcc).source | split-path -parent
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$gccpath;$env:PATH"
echo $env:PATH
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
make -j 4
- name: 'Build Unix Go Runners'
if: ${{ ! startsWith(matrix.os, 'windows-') }}
run: make -j 4
- run: go build .
lint:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
arch: [amd64, arm64]
exclude:
- os: ubuntu-latest
arch: arm64
- os: windows-2019
arch: arm64
- os: macos-latest
arch: amd64
runs-on: ${{ matrix.os }}
env:
GOARCH: ${{ matrix.arch }}
CGO_ENABLED: '1'
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Add msys paths
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: false
- run: |
case ${{ matrix.arch }} in
amd64) echo ARCH=x86_64 ;;
arm64) echo ARCH=arm64 ;;
esac >>$GITHUB_ENV
shell: bash
- uses: golangci/golangci-lint-action@v6
with:
args: --timeout 10m0s -v
test: test:
strategy: strategy:
matrix: matrix:
os: [ubuntu-latest, macos-latest, windows-2019] os: [ubuntu-latest, macos-latest, windows-latest]
arch: [amd64]
exclude:
- os: ubuntu-latest
arch: arm64
- os: windows-2019
arch: arm64
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
env: env:
GOARCH: ${{ matrix.arch }}
CGO_ENABLED: '1' CGO_ENABLED: '1'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
submodules: recursive
- name: Add msys paths
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version-file: go.mod go-version-file: go.mod
cache: true - uses: golangci/golangci-lint-action@v6
- run: | with:
case ${{ matrix.arch }} in args: --timeout 10m0s -v
amd64) echo ARCH=amd64 ;;
arm64) echo ARCH=arm64 ;;
esac >>$GITHUB_ENV
shell: bash
- run: go test ./... - run: go test ./...
patches: patches:
needs: [changes]
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: - name: Verify patches apply cleanly and do not change files
submodules: recursive
- name: Verify patches carry all the changes
run: | run: |
make apply-patches sync && git diff --compact-summary --exit-code llama make -f Makefile2 clean checkout sync
git diff --compact-summary --exit-code

4
.gitignore vendored
View File

@@ -12,4 +12,6 @@ test_data
*.crt *.crt
llama/build llama/build
__debug_bin* __debug_bin*
llama/vendor llama/vendor
model/model_test/testdata/*/
!model/model_test/testdata/*.*

52
CMakeLists.txt Normal file
View File

@@ -0,0 +1,52 @@
cmake_minimum_required(VERSION 3.21)
project(Ollama C CXX)
include(CheckLanguage)
find_package(Threads REQUIRED)
set(CMAKE_BUILD_TYPE Release)
set(BUILD_SHARED_LIBS ON)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(GGML_BUILD ON)
set(GGML_SHARED ON)
set(GGML_CCACHE ON)
set(GGML_BACKEND_DL ON)
set(GGML_BACKEND_SHARED ON)
set(GGML_SCHED_MAX_COPIES 4)
set(GGML_LLAMAFILE ON)
set(GGML_CPU_ALL_VARIANTS ON)
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
set(GGML_CUDA_GRAPHS ON)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx)
set(GGML_CPU ON)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE)
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24" AND NOT CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES "native")
endif()
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
endif()
check_language(HIP)
if(CMAKE_HIP_COMPILER)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)
endif()

98
CMakePresets.json Normal file
View File

@@ -0,0 +1,98 @@
{
"version": 3,
"configurePresets": [
{
"name": "Default",
"binaryDir": "${sourceDir}/dist/build",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release"
}
},
{
"name": "CUDA",
"inherits": [ "Default" ]
},
{
"name": "CUDA 11",
"inherits": [ "CUDA" ],
"cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;62;70;72;75;80;86"
}
},
{
"name": "CUDA 12",
"inherits": [ "CUDA" ],
"cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "60;61;62;70;72;75;80;86;87;89;90;90a"
}
},
{
"name": "JetPack 5",
"inherits": [ "CUDA" ],
"cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "72;87"
}
},
{
"name": "JetPack 6",
"inherits": [ "CUDA" ],
"cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "87"
}
},
{
"name": "ROCm",
"inherits": [ "Default" ]
},
{
"name": "ROCm 6",
"inherits": [ "ROCm" ],
"cacheVariables": {
"CMAKE_HIP_ARCHITECTURES": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
}
}
],
"buildPresets": [
{
"name": "Default",
"configurePreset": "Default",
"configuration": "Release",
"targets": [ "ggml-cpu" ]
},
{
"name": "CUDA",
"configurePreset": "CUDA",
"targets": [ "ggml-cuda" ]
},
{
"name": "CUDA 11",
"inherits": [ "CUDA" ],
"configurePreset": "CUDA 11"
},
{
"name": "CUDA 12",
"inherits": [ "CUDA" ],
"configurePreset": "CUDA 12"
},
{
"name": "JetPack 5",
"inherits": [ "CUDA" ],
"configurePreset": "JetPack 5"
},
{
"name": "JetPack 6",
"inherits": [ "CUDA" ],
"configurePreset": "JetPack 6"
},
{
"name": "ROCm",
"configurePreset": "ROCm",
"targets": [ "ggml-hip" ]
},
{
"name": "ROCm 6",
"inherits": [ "ROCm" ],
"configurePreset": "ROCm 6"
}
]
}

View File

@@ -1,201 +1,161 @@
ARG GOLANG_VERSION=1.22.8 # vim: filetype=dockerfile
ARG CUDA_VERSION_11=11.3.1
ARG CUDA_VERSION_12=12.4.0
ARG ROCM_VERSION=6.1.2
ARG JETPACK_6=r36.2.0
ARG JETPACK_5=r35.4.1
### To create a local image for building linux binaries on mac or windows with efficient incremental builds ARG FLAVOR=${TARGETARCH}
#
# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 .
# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
#
### Then incremental builds will be much faster in this container
#
# make -j 10 dist
#
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
ARG GOLANG_VERSION
ARG CUDA_VERSION_11
ARG CUDA_VERSION_12
COPY ./scripts/rh_linux_deps.sh /
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
dnf clean all && \
dnf install -y \
zsh \
cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
# TODO intel oneapi goes here...
ENV GOARCH amd64
ENV CGO_ENABLED 1
WORKDIR /go/src/github.com/ollama/ollama/
ENTRYPOINT [ "zsh" ]
### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds ARG ROCMVERSION=6.1.2
# Note: this does not contain jetson variants ARG JETPACK5VERSION=r35.4.1
# ARG JETPACK6VERSION=r36.2.0
# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 . ARG CMAKEVERSION=3.31.2
# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
#
FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
ARG GOLANG_VERSION
ARG CUDA_VERSION_11
ARG CUDA_VERSION_12
COPY ./scripts/rh_linux_deps.sh /
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
dnf config-manager --set-enabled appstream && \
dnf clean all && \
dnf install -y \
zsh \
cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
ENV GOARCH arm64
ENV CGO_ENABLED 1
WORKDIR /go/src/github.com/ollama/ollama/
ENTRYPOINT [ "zsh" ]
FROM --platform=linux/amd64 unified-builder-amd64 AS build-amd64 FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCMVERSION}-complete AS base-amd64
COPY . . RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \
ARG OLLAMA_SKIP_CUDA_GENERATE && yum install -y yum-utils devtoolset-10-gcc devtoolset-10-gcc-c++ \
ARG OLLAMA_SKIP_ROCM_GENERATE && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo \
ARG OLLAMA_FAST_BUILD && curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /usr/local/bin --strip-components 1
ARG VERSION ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:/opt/rh/devtoolset-11/root/usr/bin:$PATH
ARG CUSTOM_CPU_FLAGS
FROM --platform=linux/arm64 rockylinux:8 AS base-arm64
# install epel-release for ccache
RUN yum install -y yum-utils epel-release \
&& yum install -y clang ccache \
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
ENV CC=clang CXX=clang++
FROM base-${TARGETARCH} AS base
ARG CMAKEVERSION
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
ENV LDFLAGS=-s
FROM base AS cpu
# amd64 uses gcc which requires devtoolset-11 for AVX extensions while arm64 uses clang
RUN if [ "$(uname -m)" = "x86_64" ]; then yum install -y devtoolset-11-gcc devtoolset-11-gcc-c++; fi
ENV PATH=/opt/rh/devtoolset-11/root/usr/bin:$PATH
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \ cmake --preset 'Default' && cmake --build --parallel --preset 'Default'
make -j $(nproc) dist ; \
else \
make -j 5 dist ; \
fi
RUN cd dist/linux-$GOARCH && \
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
cd dist/linux-$GOARCH-rocm && \
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
fi
# Jetsons need to be built in discrete stages FROM base AS cuda-11
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64 ARG CUDA11VERSION=11.3
ARG GOLANG_VERSION RUN yum install -y cuda-toolkit-${CUDA11VERSION//./-}
RUN apt-get update && apt-get install -y git curl ccache && \ ENV PATH=/usr/local/cuda-11/bin:$PATH
curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
ln -s /usr/local/go/bin/go /usr/local/bin/go && \
ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
apt-get clean && rm -rf /var/lib/apt/lists/*
WORKDIR /go/src/github.com/ollama/ollama/
COPY . .
ARG CGO_CFLAGS
ENV GOARCH arm64
ARG VERSION
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
make -j 5 dist_cuda_v11 \ cmake --preset 'CUDA 11' && cmake --build --parallel --preset 'CUDA 11'
CUDA_ARCHITECTURES="72;87" \
GPU_RUNNER_VARIANT=_jetpack5 \
DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \
DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64 FROM base AS cuda-12
ARG GOLANG_VERSION ARG CUDA12VERSION=12.4
RUN apt-get update && apt-get install -y git curl ccache && \ RUN yum install -y cuda-toolkit-${CUDA12VERSION//./-}
curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \ ENV PATH=/usr/local/cuda-12/bin:$PATH
ln -s /usr/local/go/bin/go /usr/local/bin/go && \
ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
apt-get clean && rm -rf /var/lib/apt/lists/*
WORKDIR /go/src/github.com/ollama/ollama/
COPY . .
ARG CGO_CFLAGS
ENV GOARCH arm64
ARG VERSION
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
make -j 5 dist_cuda_v12 \ cmake --preset 'CUDA 12' && cmake --build --parallel --preset 'CUDA 12'
CUDA_ARCHITECTURES="87" \
GPU_RUNNER_VARIANT=_jetpack6 \
DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \
DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6
FROM --platform=linux/arm64 unified-builder-arm64 AS build-arm64 FROM base AS rocm-6
COPY . .
ARG OLLAMA_SKIP_CUDA_GENERATE
ARG OLLAMA_FAST_BUILD
ARG VERSION
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
make -j 5 dist cmake --preset 'ROCm 6' && cmake --build --parallel --preset 'ROCm 6'
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
RUN cd dist/linux-$GOARCH && \
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
RUN cd dist/linux-$GOARCH-jetpack5 && \
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz
RUN cd dist/linux-$GOARCH-jetpack6 && \
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz
FROM --platform=linux/amd64 scratch AS dist-amd64 FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / ARG CMAKEVERSION
FROM --platform=linux/arm64 scratch AS dist-arm64 RUN apt-get update && apt-get install -y curl ccache \
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
FROM dist-$TARGETARCH AS dist COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'JetPack 5' && cmake --build --parallel --preset 'JetPack 5'
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6
ARG CMAKEVERSION
RUN apt-get update && apt-get install -y curl ccache \
&& curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'JetPack 6' && cmake --build --parallel --preset 'JetPack 6'
# For amd64 container images, filter out cuda/rocm to minimize size FROM base AS build
FROM build-amd64 AS runners-cuda-amd64 ARG GOVERSION=1.23.4
RUN rm -rf \ RUN curl -fsSL https://golang.org/dl/go${GOVERSION}.linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
./dist/linux-amd64/lib/ollama/libggml_hipblas.so \ ENV PATH=/usr/local/go/bin:$PATH
./dist/linux-amd64/lib/ollama/runners/rocm* WORKDIR /go/src/github.com/ollama/ollama
COPY . .
ARG GOFLAGS="'-ldflags=-w -s'"
ENV CGO_ENABLED=1
RUN --mount=type=cache,target=/root/.cache/go-build \
go build -trimpath -buildmode=pie -o /bin/ollama .
FROM build-amd64 AS runners-rocm-amd64 FROM --platform=linux/amd64 scratch AS amd64
RUN rm -rf \ COPY --from=cuda-11 --chmod=644 \
./dist/linux-amd64/lib/ollama/libggml_cuda*.so \ dist/build/lib/libggml-cuda.so \
./dist/linux-amd64/lib/ollama/libcu*.so* \ /usr/local/cuda/lib64/libcublas.so.11 \
./dist/linux-amd64/lib/ollama/runners/cuda* /usr/local/cuda/lib64/libcublasLt.so.11 \
/usr/local/cuda/lib64/libcudart.so.11.0 \
/lib/ollama/cuda_v11/
COPY --from=cuda-12 --chmod=644 \
dist/build/lib/libggml-cuda.so \
/usr/local/cuda/lib64/libcublas.so.12 \
/usr/local/cuda/lib64/libcublasLt.so.12 \
/usr/local/cuda/lib64/libcudart.so.12 \
/lib/ollama/cuda_v12/
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64 FROM --platform=linux/arm64 scratch AS arm64
RUN apt-get update && \ COPY --from=cuda-11 --chmod=644 \
apt-get install -y ca-certificates && \ dist/build/lib/libggml-cuda.so \
apt-get clean && rm -rf /var/lib/apt/lists/* /usr/local/cuda/lib64/libcublas.so.11 \
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ /usr/local/cuda/lib64/libcublasLt.so.11 \
COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ /usr/local/cuda/lib64/libcudart.so.11.0 \
/lib/ollama/cuda_v11/
COPY --from=cuda-12 --chmod=644 \
dist/build/lib/libggml-cuda.so \
/usr/local/cuda/lib64/libcublas.so.12 \
/usr/local/cuda/lib64/libcublasLt.so.12 \
/usr/local/cuda/lib64/libcudart.so.12 \
/lib/ollama/cuda_v12/
COPY --from=jetpack-5 --chmod=644 \
dist/build/lib/libggml-cuda.so \
/usr/local/cuda/lib64/libcublas.so.11 \
/usr/local/cuda/lib64/libcublasLt.so.11 \
/usr/local/cuda/lib64/libcudart.so.11.0 \
/lib/ollama/cuda_jetpack5/
COPY --from=jetpack-6 --chmod=644 \
dist/build/lib/libggml-cuda.so \
/usr/local/cuda/lib64/libcublas.so.12 \
/usr/local/cuda/lib64/libcublasLt.so.12 \
/usr/local/cuda/lib64/libcudart.so.12 \
/lib/ollama/cuda_jetpack6/
FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64 FROM --platform=linux/arm64 scratch AS rocm
RUN apt-get update && \ COPY --from=rocm-6 --chmod=644 \
apt-get install -y ca-certificates && \ dist/build/lib/libggml-hip.so \
apt-get clean && rm -rf /var/lib/apt/lists/* /opt/rocm/lib/libamdhip64.so.6 \
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/ /opt/rocm/lib/libhipblas.so.2 \
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ /opt/rocm/lib/librocblas.so.4 \
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/ /opt/rocm/lib/libamd_comgr.so.2 \
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/ /opt/rocm/lib/libhsa-runtime64.so.1 \
/opt/rocm/lib/librocprofiler-register.so.0 \
/opt/amdgpu/lib64/libdrm_amdgpu.so.1 \
/opt/amdgpu/lib64/libdrm.so.2 \
/usr/lib64/libnuma.so.1 \
/lib/ollama/rocm/
COPY --from=rocm-6 /opt/rocm/lib/rocblas/ /lib/ollama/rocm/rocblas/
FROM ${FLAVOR} AS archive
COPY --from=cpu --chmod=644 \
dist/build/lib/libggml-base.so \
dist/build/lib/libggml-cpu-*.so \
/lib/ollama/
COPY --from=build /bin/ollama /bin/ollama
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image FROM ubuntu:20.04
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm RUN apt-get update \
# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer && apt-get install -y ca-certificates \
# across releases && apt-get clean \
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/ && rm -rf /var/lib/apt/lists/*
RUN apt-get update && \ COPY --from=archive /bin/ /usr/bin/
apt-get install -y ca-certificates && \
apt-get clean && rm -rf /var/lib/apt/lists/*
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
EXPOSE 11434
ENV OLLAMA_HOST 0.0.0.0
ENTRYPOINT ["/bin/ollama"]
CMD ["serve"]
FROM runtime-$TARGETARCH
EXPOSE 11434
ENV OLLAMA_HOST 0.0.0.0
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 COPY --from=archive /lib/ollama/ /usr/lib/ollama/
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/ollama
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_VISIBLE_DEVICES=all ENV NVIDIA_VISIBLE_DEVICES=all
ENV OLLAMA_HOST=0.0.0.0:11434
EXPOSE 11434
ENTRYPOINT ["/bin/ollama"] ENTRYPOINT ["/bin/ollama"]
CMD ["serve"] CMD ["serve"]

103
Makefile
View File

@@ -1,103 +0,0 @@
# top level makefile for Ollama
include make/common-defs.make
# Determine which if any GPU runners we should build
include make/cuda-v11-defs.make
include make/cuda-v12-defs.make
include make/rocm-defs.make
ifeq ($(CUSTOM_CPU_FLAGS),)
ifeq ($(ARCH),amd64)
RUNNER_TARGETS=cpu
endif
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
ifneq ($(CUDA_11_COMPILER),)
RUNNER_TARGETS += cuda_v11
endif
ifneq ($(CUDA_12_COMPILER),)
RUNNER_TARGETS += cuda_v12
endif
endif
else # CUSTOM_CPU_FLAGS is set, we'll build only the latest cuda version detected
ifneq ($(CUDA_12_COMPILER),)
RUNNER_TARGETS += cuda_v12
else ifneq ($(CUDA_11_COMPILER),)
RUNNER_TARGETS += cuda_v11
endif
endif
ifeq ($(OLLAMA_SKIP_ROCM_GENERATE),)
ifneq ($(HIP_COMPILER),)
RUNNER_TARGETS += rocm
endif
endif
all: runners exe
dist: $(addprefix dist_, $(RUNNER_TARGETS)) dist_exe
dist_%:
@$(MAKE) --no-print-directory -f make/Makefile.$* dist
runners: $(RUNNER_TARGETS)
$(RUNNER_TARGETS):
@$(MAKE) --no-print-directory -f make/Makefile.$@
exe dist_exe:
@$(MAKE) --no-print-directory -f make/Makefile.ollama $@
help-sync apply-patches create-patches sync sync-clean:
@$(MAKE) --no-print-directory -f make/Makefile.sync $@
test integration lint:
@$(MAKE) --no-print-directory -f make/Makefile.test $@
clean:
rm -rf $(BUILD_DIR) $(DIST_LIB_DIR) $(OLLAMA_EXE) $(DIST_OLLAMA_EXE)
go clean -cache
help:
@echo "The following make targets will help you build Ollama"
@echo ""
@echo " make all # (default target) Build Ollama llm subprocess runners, and the primary ollama executable"
@echo " make runners # Build Ollama llm subprocess runners; after you may use 'go build .' to build the primary ollama exectuable"
@echo " make <runner> # Build specific runners. Enabled: '$(RUNNER_TARGETS)'"
@echo " make dist # Build the runners and primary ollama executable for distribution"
@echo " make help-sync # Help information on vendor update targets"
@echo " make help-runners # Help information on runner targets"
@echo ""
@echo "The following make targets will help you test Ollama"
@echo ""
@echo " make test # Run unit tests"
@echo " make integration # Run integration tests. You must 'make all' first"
@echo " make lint # Run lint and style tests"
@echo ""
@echo "For more information see 'docs/development.md'"
@echo ""
help-runners:
@echo "The following runners will be built based on discovered GPU libraries: '$(RUNNER_TARGETS)'"
@echo ""
@echo "GPU Runner CPU Flags: '$(GPU_RUNNER_CPU_FLAGS)' (Override with CUSTOM_CPU_FLAGS)"
@echo ""
@echo "# CUDA_PATH sets the location where CUDA toolkits are present"
@echo "CUDA_PATH=$(CUDA_PATH)"
@echo " CUDA_11_PATH=$(CUDA_11_PATH)"
@echo " CUDA_11_COMPILER=$(CUDA_11_COMPILER)"
@echo " CUDA_12_PATH=$(CUDA_12_PATH)"
@echo " CUDA_12_COMPILER=$(CUDA_12_COMPILER)"
@echo ""
@echo "# HIP_PATH sets the location where the ROCm toolkit is present"
@echo "HIP_PATH=$(HIP_PATH)"
@echo " HIP_COMPILER=$(HIP_COMPILER)"
.PHONY: all exe dist help help-sync help-runners test integration lint runners clean $(RUNNER_TARGETS)
# Handy debugging for make variables
print-%:
@echo '$*=$($*)'

46
Makefile2 Normal file
View File

@@ -0,0 +1,46 @@
UPSTREAM=https://github.com/ggerganov/llama.cpp.git
WORKDIR=llama/vendor
FETCH_HEAD=46e3556e01b824e52395fb050b29804b6cff2a7c
all: sync
.PHONY: sync
sync: llama/llama.cpp ml/backend/ggml/ggml
.PHONY: llama/llama.cpp
llama/llama.cpp: llama/vendor/ apply_patches
rsync -arvzc -f "merge $@/.rsync-filter" $< $@
.PHONY: ml/backend/ggml/ggml apply_patches
ml/backend/ggml/ggml: llama/vendor/ggml/ apply_patches
rsync -arvzc -f "merge $@/.rsync-filter" $< $@
PATCHES=$(wildcard llama/patches/*.patch)
.PHONY: apply_patches
.NOTPARALLEL:
apply_patches: $(addsuffix ed, $(PATCHES))
%.patched: %.patch
@if git -c user.name=nobody -c 'user.email=<>' -C $(WORKDIR) am -3 $(realpath $<); then touch $@; else git -C $(WORKDIR) am --abort; exit 1; fi
.PHONY: checkout
checkout: $(WORKDIR)
git -C $(WORKDIR) fetch
git -C $(WORKDIR) checkout -f $(FETCH_HEAD)
$(WORKDIR):
git clone $(UPSTREAM) $(WORKDIR)
.PHONE: format_patches
format_patches: llama/patches
git -C $(WORKDIR) format-patch \
--no-signature \
--no-numbered \
--zero-commit \
-o $(realpath $<) \
$(FETCH_HEAD)
.PHONE: clean
clean: checkout
$(RM) $(addsuffix ed, $(PATCHES))

View File

@@ -539,4 +539,5 @@ See the [API documentation](./docs/api.md) for all endpoints.
### Observability ### Observability
- [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics. - [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
- [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) is an AI observability and evaluation platform for AI agents. Use HoneyHive to evaluate agent performance, interrogate failures, and monitor quality in production. - [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) is an AI observability and evaluation platform for AI agents. Use HoneyHive to evaluate agent performance, interrogate failures, and monitor quality in production.
- [Langfuse](https://langfuse.com/docs/integrations/ollama) is an open source LLM observability platform that enables teams to collaboratively monitor, evaluate and debug AI applications.

384
cache/cache.go vendored Normal file
View File

@@ -0,0 +1,384 @@
package cache
import (
"errors"
"fmt"
"log/slog"
"math"
"slices"
"github.com/ollama/ollama/ml"
)
var ErrNotSupported = errors.New("model does not support operation")
type Cache interface {
// used by model implementations
Sub(i int) Cache
Put(ctx ml.Context, key, value ml.Tensor) (ml.Tensor, ml.Tensor, ml.Tensor)
// cache management
Close()
StartForward(ctx ml.Context, seqs []int) error
CopyPrefix(srcSeq, dstSeq int, len int)
Remove(seq int, beginIndex, endIndex int) error
}
type Causal struct {
DType ml.DType
Capacity int
// current forward pass
curLayer int
curPos int
curBatchSize int
curMask ml.Tensor
curCellRange cellRange
// metadata
cells []cacheCell
seqNextPos map[int]int
cellRanges map[int]cellRange
// cache data storage
backend ml.Backend
cacheCtx ml.Context
keys, values []ml.Tensor
}
type seqCell struct {
seq int
pos int
}
type cacheCell struct {
sequences []seqCell
}
type cellRange struct {
min int
max int
}
func (cell cacheCell) findSeq(seq int) *seqCell {
for i := range cell.sequences {
if cell.sequences[i].seq == seq {
return &cell.sequences[i]
}
}
return nil
}
func NewCausalCache(backend ml.Backend, capacity int, dtype ml.DType) Cache {
return &Causal{
Capacity: capacity,
DType: dtype,
cells: make([]cacheCell, capacity),
seqNextPos: make(map[int]int),
cellRanges: make(map[int]cellRange),
backend: backend,
// TODO(jessegross): This context is not sized appropriately
cacheCtx: backend.NewContext(),
}
}
func (c *Causal) Close() {
c.cacheCtx.Close()
}
var ErrKvCacheFull = errors.New("could not find a kv cache slot")
func (c *Causal) StartForward(ctx ml.Context, seqs []int) error {
c.curBatchSize = len(seqs)
var err error
c.curPos, err = c.findStartPos()
if errors.Is(err, ErrKvCacheFull) {
c.defrag()
c.curPos, err = c.findStartPos()
}
if err != nil {
return err
}
// TODO(jessegross): There should be a better way to do this
origSeq := make(map[int]int)
for k, v := range c.seqNextPos {
origSeq[k] = v
}
c.curCellRange = newRange()
for i, seq := range seqs {
c.cells[c.curPos+i] = cacheCell{sequences: []seqCell{{seq: seq, pos: c.seqNextPos[seq]}}}
c.seqNextPos[seq]++
ranges := c.cellRanges[seq]
if c.curPos+i > ranges.max {
ranges.max = c.curPos + i
}
if ranges.max > c.curCellRange.max {
c.curCellRange.max = ranges.max
}
if c.curPos+i < ranges.min {
ranges.min = c.curPos + i
}
if ranges.min < c.curCellRange.min {
c.curCellRange.min = ranges.min
}
c.cellRanges[seq] = ranges
}
c.curMask, err = c.buildMask(ctx, origSeq, seqs)
return err
}
func newRange() cellRange {
return cellRange{
min: math.MaxInt,
max: 0,
}
}
func (c *Causal) findStartPos() (int, error) {
var start, count int
for i := range c.cells {
if len(c.cells[i].sequences) == 0 {
count++
if count >= c.curBatchSize {
return start, nil
}
} else {
start = i + 1
count = 0
}
}
return 0, fmt.Errorf("%w (length: %v)", ErrKvCacheFull, c.Capacity)
}
func (c *Causal) buildMask(ctx ml.Context, origSeq map[int]int, seqs []int) (ml.Tensor, error) {
// TODO(jessegross): This makes a number of simplifications such as no padding
len := c.curCellRange.max - c.curCellRange.min
mask := make([]float32, c.curBatchSize*len)
for i := range c.curBatchSize {
for j := c.curCellRange.min; j < c.curCellRange.max; j++ {
cellSeq := c.cells[j].findSeq(seqs[i])
if cellSeq == nil || cellSeq.pos > origSeq[seqs[i]]+i {
mask[i*len+(j-c.curCellRange.min)] = float32(math.Inf(-1))
}
}
}
return ctx.FromFloatSlice(mask, len, c.curBatchSize)
}
func moveCell(ctx ml.Context, objs []ml.Tensor, src, dst, len int) {
for _, obj := range objs {
srcView := obj.View(ctx, int(obj.Stride(2))*src, int(obj.Dim(0)*obj.Dim(1))*len)
dstView := obj.View(ctx, int(obj.Stride(2))*dst, int(obj.Dim(0)*obj.Dim(1))*len)
ctx.Forward(srcView.Copy(ctx, dstView))
}
}
func (c *Causal) defrag() {
slog.Debug("defragmenting kv cache")
// Defrag strategy:
// - Search for empty holes at the beginning of the cache,
// filling them with active data starting at the end
// - If there are contiguous elements that need to be moved,
// combine them into a single operation by holding new moves
// until we see the next one is non-contiguous
// - Fill up the context with the maximum number of operations it
// can hold then compute that and continue with a new context
// TODO(jessegross):
// - Need to size the context and compute maxMoves correctly
// - Just compacts, doesn't optimize placement
maxMoves := 8192 / (6 * len(c.keys))
ctx := c.backend.NewContext()
moves := 0
var pendingSrc, pendingDst, pendingLen int
for dst := range c.cells {
if len(c.cells[dst].sequences) == 0 {
for src := len(c.cells) - 1; src > dst; src-- {
if len(c.cells[src].sequences) != 0 {
c.cells[dst] = c.cells[src]
c.cells[src] = cacheCell{}
if pendingLen > 0 {
if src == pendingSrc-pendingLen && dst == pendingDst+pendingLen {
pendingSrc = src
pendingLen++
break
} else {
moveCell(ctx, c.keys, pendingSrc, pendingDst, pendingLen)
moveCell(ctx, c.values, pendingSrc, pendingDst, pendingLen)
moves++
}
}
pendingSrc = src
pendingDst = dst
pendingLen = 1
break
}
}
}
if moves >= maxMoves {
ctx.Compute(nil)
ctx.Close()
ctx = c.backend.NewContext()
moves = 0
}
}
if pendingLen > 0 {
moveCell(ctx, c.keys, pendingSrc, pendingDst, pendingLen)
moveCell(ctx, c.values, pendingSrc, pendingDst, pendingLen)
moves++
}
if moves > 0 {
ctx.Compute(nil)
}
ctx.Close()
for seq := range c.cellRanges {
seqRange := newRange()
for i, cell := range c.cells {
if cell.findSeq(seq) != nil {
if i < seqRange.min {
seqRange.min = i
}
if i > seqRange.max {
seqRange.max = i
}
}
}
c.cellRanges[seq] = seqRange
}
}
func (c *Causal) Sub(i int) Cache {
if i >= len(c.keys) {
c.keys = append(c.keys, make([]ml.Tensor, i-len(c.keys)+1)...)
c.values = append(c.values, make([]ml.Tensor, i-len(c.values)+1)...)
}
c.curLayer = i
return c
}
func (c *Causal) Put(ctx ml.Context, key, value ml.Tensor) (ml.Tensor, ml.Tensor, ml.Tensor) {
if c.curBatchSize != int(key.Dim(2)) {
panic(fmt.Errorf("inconsistent batch sizes (layer: %v, batch size: %v layer batch size: %v)", c.curLayer, c.curBatchSize, int(key.Dim(2))))
}
if c.keys[c.curLayer] == nil || c.values[c.curLayer] == nil {
c.keys[c.curLayer] = c.cacheCtx.Zeros(c.DType, key.Dim(0), key.Dim(1), int64(c.Capacity))
c.values[c.curLayer] = c.cacheCtx.Zeros(c.DType, value.Dim(0), value.Dim(1), int64(c.Capacity))
}
ctx.Forward(key.Copy(ctx, c.keys[c.curLayer].View(ctx, int(key.Stride(2))*c.curPos, int(key.Dim(0)*key.Dim(1)*key.Dim(2)))))
ctx.Forward(value.Copy(ctx, c.values[c.curLayer].View(ctx, int(value.Stride(2))*c.curPos, int(value.Dim(0)*value.Dim(1)*value.Dim(2)))))
len := c.curCellRange.max - c.curCellRange.min
key = c.keys[c.curLayer].View(ctx, int(key.Stride(2))*c.curCellRange.min,
int(key.Dim(0)), int(key.Stride(1)),
int(key.Dim(1)), int(key.Stride(2)),
len,
)
value = c.values[c.curLayer].View(ctx, int(key.Stride(2))*c.curCellRange.min,
int(value.Dim(0)), int(value.Stride(1)),
int(value.Dim(1)), int(value.Stride(2)),
len,
)
return key, value, c.curMask
}
func (c *Causal) CopyPrefix(srcSeq, dstSeq int, len int) {
seqRange := newRange()
for i := range c.cells {
srcCellSeq := c.cells[i].findSeq(srcSeq)
dstCellSeq := c.cells[i].findSeq(dstSeq)
if dstCellSeq != nil {
c.cells[i].sequences = slices.DeleteFunc(c.cells[i].sequences, func(s seqCell) bool { return s.seq == dstSeq })
}
if srcCellSeq != nil && srcCellSeq.pos < len {
c.cells[i].sequences = append(c.cells[i].sequences, seqCell{seq: dstSeq, pos: srcCellSeq.pos})
if i < seqRange.min {
seqRange.min = i
}
if i > seqRange.max {
seqRange.max = i
}
}
}
c.cellRanges[dstSeq] = seqRange
c.seqNextPos[dstSeq] = len
}
func (c *Causal) shift(seq int, beginIndex, endIndex, offset int) error {
panic("Shift not yet implemented")
}
func (c *Causal) Remove(seq int, beginIndex, endIndex int) error {
endIndex = min(endIndex, c.seqNextPos[seq])
offset := beginIndex - endIndex
seqRange := newRange()
for i := range c.cells {
cellSeq := c.cells[i].findSeq(seq)
if cellSeq != nil {
if cellSeq.pos >= beginIndex && cellSeq.pos < endIndex {
c.cells[i].sequences = slices.DeleteFunc(c.cells[i].sequences, func(s seqCell) bool { return s.seq == seq })
} else {
if cellSeq.pos >= endIndex {
cellSeq.pos += offset
}
if i < seqRange.min {
seqRange.min = i
}
if i > seqRange.max {
seqRange.max = i
}
}
}
}
if endIndex != c.seqNextPos[seq] {
err := c.shift(seq, endIndex, c.seqNextPos[seq], offset)
if err != nil {
return err
}
}
c.cellRanges[seq] = seqRange
c.seqNextPos[seq] += offset
return nil
}

48
cache/tensor.go vendored Normal file
View File

@@ -0,0 +1,48 @@
package cache
import (
"github.com/ollama/ollama/ml"
)
type TensorCache struct {
curLayer int
cacheCtx ml.Context
keys, values []ml.Tensor
}
func NewTensorCache(backend ml.Backend) *TensorCache {
return &TensorCache{
// TODO(jessegross): This context is not sized appropriately
cacheCtx: backend.NewContext(),
}
}
func (c *TensorCache) Close() {
c.cacheCtx.Close()
}
func (c *TensorCache) Sub(i int) *TensorCache {
if i >= len(c.keys) {
c.keys = append(c.keys, make([]ml.Tensor, i-len(c.keys)+1)...)
c.values = append(c.values, make([]ml.Tensor, i-len(c.values)+1)...)
}
c.curLayer = i
return c
}
func (c *TensorCache) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor) {
return c.keys[c.curLayer], c.values[c.curLayer], nil
}
func (c *TensorCache) Put(ctx ml.Context, key, value ml.Tensor) {
if c.keys[c.curLayer] == nil || c.values[c.curLayer] == nil {
c.keys[c.curLayer] = c.cacheCtx.Zeros(key.DType(), key.Shape()...)
c.values[c.curLayer] = c.cacheCtx.Zeros(value.DType(), value.Shape()...)
}
ctx.Forward(key.Copy(ctx, c.keys[c.curLayer]))
ctx.Forward(value.Copy(ctx, c.values[c.curLayer]))
}

View File

@@ -35,9 +35,9 @@ import (
"github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
"github.com/ollama/ollama/llama" "github.com/ollama/ollama/llama"
"github.com/ollama/ollama/llama/runner"
"github.com/ollama/ollama/parser" "github.com/ollama/ollama/parser"
"github.com/ollama/ollama/progress" "github.com/ollama/ollama/progress"
"github.com/ollama/ollama/runner"
"github.com/ollama/ollama/server" "github.com/ollama/ollama/server"
"github.com/ollama/ollama/types/model" "github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version" "github.com/ollama/ollama/version"
@@ -59,7 +59,7 @@ func getModelfileName(cmd *cobra.Command) (string, error) {
_, err = os.Stat(absName) _, err = os.Stat(absName)
if err != nil { if err != nil {
return filename, err return "", err
} }
return absName, nil return absName, nil
@@ -338,7 +338,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
return err return err
} }
opts.MultiModal = len(info.ProjectorInfo) != 0 opts.MultiModal = true //len(info.ProjectorInfo) != 0
opts.ParentModel = info.Details.ParentModel opts.ParentModel = info.Details.ParentModel
if interactive { if interactive {

View File

@@ -279,7 +279,7 @@ func TestGetModelfileName(t *testing.T) {
name: "no modelfile specified, no modelfile exists", name: "no modelfile specified, no modelfile exists",
modelfileName: "", modelfileName: "",
fileExists: false, fileExists: false,
expectedName: "Modelfile", expectedName: "",
expectedErr: os.ErrNotExist, expectedErr: os.ErrNotExist,
}, },
{ {
@@ -293,7 +293,7 @@ func TestGetModelfileName(t *testing.T) {
name: "modelfile specified, no modelfile exists", name: "modelfile specified, no modelfile exists",
modelfileName: "crazyfile", modelfileName: "crazyfile",
fileExists: false, fileExists: false,
expectedName: "crazyfile", expectedName: "",
expectedErr: os.ErrNotExist, expectedErr: os.ErrNotExist,
}, },
{ {

View File

@@ -4,7 +4,7 @@ import (
"fmt" "fmt"
"os" "os"
"github.com/ollama/ollama/llama/runner" "github.com/ollama/ollama/runner"
) )
func main() { func main() {

View File

@@ -9,7 +9,7 @@ import (
"log/slog" "log/slog"
"strings" "strings"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/fs/ggml"
) )
type ModelParameters struct { type ModelParameters struct {
@@ -27,8 +27,8 @@ type AdapterParameters struct {
} `json:"lora_parameters"` } `json:"lora_parameters"`
} }
func (ModelParameters) KV(t *Tokenizer) llm.KV { func (ModelParameters) KV(t *Tokenizer) ggml.KV {
kv := llm.KV{ kv := ggml.KV{
"general.file_type": uint32(1), "general.file_type": uint32(1),
"general.quantization_version": uint32(2), "general.quantization_version": uint32(2),
"tokenizer.ggml.pre": t.Pre, "tokenizer.ggml.pre": t.Pre,
@@ -54,7 +54,7 @@ func (ModelParameters) KV(t *Tokenizer) llm.KV {
return kv return kv
} }
func (p AdapterParameters) KV() llm.KV { func (p AdapterParameters) KV() ggml.KV {
var alpha float32 var alpha float32
if p.LoraParameters.Alpha == 0 { if p.LoraParameters.Alpha == 0 {
alpha = float32(p.Alpha) alpha = float32(p.Alpha)
@@ -62,7 +62,7 @@ func (p AdapterParameters) KV() llm.KV {
alpha = p.LoraParameters.Alpha alpha = p.LoraParameters.Alpha
} }
kv := llm.KV{ kv := ggml.KV{
"adapter.lora.alpha": alpha, "adapter.lora.alpha": alpha,
"adapter.type": "lora", "adapter.type": "lora",
"general.file_type": uint32(1), "general.file_type": uint32(1),
@@ -79,19 +79,19 @@ func (ModelParameters) specialTokenTypes() []string {
} }
} }
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error { func (ModelParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
return llm.WriteGGUF(ws, kv, ts) return ggml.WriteGGUF(ws, kv, ts)
} }
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error { func (AdapterParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
return llm.WriteGGUF(ws, kv, ts) return ggml.WriteGGUF(ws, kv, ts)
} }
type ModelConverter interface { type ModelConverter interface {
// KV maps parameters to LLM key-values // KV maps parameters to LLM key-values
KV(*Tokenizer) llm.KV KV(*Tokenizer) ggml.KV
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here. // Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
Tensors([]Tensor) []llm.Tensor Tensors([]Tensor) []ggml.Tensor
// Replacements returns a list of string pairs to replace in tensor names. // Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details // See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string Replacements() []string
@@ -99,7 +99,7 @@ type ModelConverter interface {
// specialTokenTypes returns any special token types the model uses // specialTokenTypes returns any special token types the model uses
specialTokenTypes() []string specialTokenTypes() []string
// writeFile writes the model to the provided io.WriteSeeker // writeFile writes the model to the provided io.WriteSeeker
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
} }
type moreParser interface { type moreParser interface {
@@ -108,17 +108,17 @@ type moreParser interface {
type AdapterConverter interface { type AdapterConverter interface {
// KV maps parameters to LLM key-values // KV maps parameters to LLM key-values
KV(llm.KV) llm.KV KV(ggml.KV) ggml.KV
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here. // Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
Tensors([]Tensor) []llm.Tensor Tensors([]Tensor) []ggml.Tensor
// Replacements returns a list of string pairs to replace in tensor names. // Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details // See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string Replacements() []string
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
} }
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error { func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
bts, err := fs.ReadFile(fsys, "adapter_config.json") bts, err := fs.ReadFile(fsys, "adapter_config.json")
if err != nil { if err != nil {
return err return err
@@ -191,6 +191,8 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
conv = &qwen2Model{} conv = &qwen2Model{}
case "BertModel": case "BertModel":
conv = &bertModel{} conv = &bertModel{}
case "CohereForCausalLM":
conv = &commandrModel{}
default: default:
return errors.New("unsupported architecture") return errors.New("unsupported architecture")
} }

View File

@@ -8,7 +8,7 @@ import (
"slices" "slices"
"strings" "strings"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/fs/ggml"
) )
type bertModel struct { type bertModel struct {
@@ -85,7 +85,7 @@ func (p *bertModel) parseMore(fsys fs.FS) error {
return nil return nil
} }
func (p *bertModel) KV(t *Tokenizer) llm.KV { func (p *bertModel) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "bert" kv["general.architecture"] = "bert"
kv["bert.attention.causal"] = false kv["bert.attention.causal"] = false
@@ -132,8 +132,8 @@ func (p *bertModel) KV(t *Tokenizer) llm.KV {
return kv return kv
} }
func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor { func (p *bertModel) Tensors(ts []Tensor) []ggml.Tensor {
var out []llm.Tensor var out []ggml.Tensor
for _, t := range ts { for _, t := range ts {
if slices.Contains([]string{ if slices.Contains([]string{
"embeddings.position_ids", "embeddings.position_ids",
@@ -143,7 +143,7 @@ func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
continue continue
} }
out = append(out, llm.Tensor{ out = append(out, ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),

View File

@@ -0,0 +1,76 @@
package convert
import (
"cmp"
"github.com/ollama/ollama/fs/ggml"
)
type commandrModel struct {
ModelParameters
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
HiddenSize uint32 `json:"hidden_size"`
HiddenLayers uint32 `json:"num_hidden_layers"`
IntermediateSize uint32 `json:"intermediate_size"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
LayerNormEPS float32 `json:"layer_norm_eps"`
RopeTheta float32 `json:"rope_theta"`
UseQKNorm bool `json:"use_qk_norm"`
MaxLength uint32 `json:"model_max_length"`
LogitScale float32 `json:"logit_scale"`
NCtx uint32 `json:"n_ctx"`
}
var _ ModelConverter = (*commandrModel)(nil)
func (p *commandrModel) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "command-r"
kv["general.name"] = "command-r"
kv["command-r.context_length"] = cmp.Or(p.MaxLength, p.MaxPositionEmbeddings, p.NCtx)
kv["command-r.embedding_length"] = p.HiddenSize
kv["command-r.block_count"] = p.HiddenLayers
kv["command-r.feed_forward_length"] = p.IntermediateSize
kv["command-r.attention.head_count"] = p.NumAttentionHeads
kv["command-r.attention.head_count_kv"] = p.NumKeyValueHeads
kv["command-r.attention.layer_norm_epsilon"] = p.LayerNormEPS
kv["command-r.rope.freq_base"] = p.RopeTheta
kv["command-r.max_position_embeddings"] = cmp.Or(p.MaxLength, p.MaxPositionEmbeddings)
kv["command-r.logit_scale"] = p.LogitScale
kv["command-r.rope.scaling.type"] = "none"
return kv
}
func (p *commandrModel) Tensors(ts []Tensor) []ggml.Tensor {
var out []ggml.Tensor
for _, t := range ts {
out = append(out, ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (p *commandrModel) Replacements() []string {
return []string{
"self_attn.q_norm", "attn_q_norm",
"self_attn.k_norm", "attn_k_norm",
"model.layers", "blk",
"input_layernorm", "attn_norm",
"mlp.down_proj", "ffn_down",
"mlp.gate_proj", "ffn_gate",
"mlp.up_proj", "ffn_up",
"self_attn.k_proj", "attn_k",
"self_attn.o_proj", "attn_output",
"self_attn.q_proj", "attn_q",
"self_attn.v_proj", "attn_v",
"model.norm", "output_norm",
"model.embed_tokens", "token_embd",
}
}

View File

@@ -6,7 +6,7 @@ import (
"github.com/pdevine/tensor" "github.com/pdevine/tensor"
"github.com/pdevine/tensor/native" "github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/fs/ggml"
) )
type gemmaModel struct { type gemmaModel struct {
@@ -23,7 +23,7 @@ type gemmaModel struct {
var _ ModelConverter = (*gemmaModel)(nil) var _ ModelConverter = (*gemmaModel)(nil)
func (p *gemmaModel) KV(t *Tokenizer) llm.KV { func (p *gemmaModel) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "gemma" kv["general.architecture"] = "gemma"
kv["gemma.context_length"] = p.MaxPositionEmbeddings kv["gemma.context_length"] = p.MaxPositionEmbeddings
@@ -42,14 +42,14 @@ func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
return kv return kv
} }
func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor { func (p *gemmaModel) Tensors(ts []Tensor) []ggml.Tensor {
var out []llm.Tensor var out []ggml.Tensor
for _, t := range ts { for _, t := range ts {
if strings.HasSuffix(t.Name(), "_norm.weight") { if strings.HasSuffix(t.Name(), "_norm.weight") {
t.SetRepacker(p.addOne) t.SetRepacker(p.addOne)
} }
out = append(out, llm.Tensor{ out = append(out, ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),

View File

@@ -1,8 +1,6 @@
package convert package convert
import ( import "github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/llm"
)
type gemma2Model struct { type gemma2Model struct {
gemmaModel gemmaModel
@@ -11,7 +9,7 @@ type gemma2Model struct {
FinalLogitSoftcap float32 `json:"final_logit_softcapping"` FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
} }
func (p *gemma2Model) KV(t *Tokenizer) llm.KV { func (p *gemma2Model) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "gemma2" kv["general.architecture"] = "gemma2"
kv["gemma2.context_length"] = p.MaxPositionEmbeddings kv["gemma2.context_length"] = p.MaxPositionEmbeddings

View File

@@ -6,7 +6,7 @@ import (
"github.com/pdevine/tensor" "github.com/pdevine/tensor"
"github.com/pdevine/tensor/native" "github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/fs/ggml"
) )
type gemma2Adapter struct { type gemma2Adapter struct {
@@ -15,14 +15,14 @@ type gemma2Adapter struct {
var _ AdapterConverter = (*gemma2Adapter)(nil) var _ AdapterConverter = (*gemma2Adapter)(nil)
func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV { func (p *gemma2Adapter) KV(baseKV ggml.KV) ggml.KV {
kv := p.AdapterParameters.KV() kv := p.AdapterParameters.KV()
kv["general.architecture"] = "gemma2" kv["general.architecture"] = "gemma2"
return kv return kv
} }
func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor { func (p *gemma2Adapter) Tensors(ts []Tensor) []ggml.Tensor {
var out []llm.Tensor var out []ggml.Tensor
for _, t := range ts { for _, t := range ts {
shape := t.Shape() shape := t.Shape()
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) || if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
@@ -31,7 +31,7 @@ func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
t.SetRepacker(p.repack) t.SetRepacker(p.repack)
} }
out = append(out, llm.Tensor{ out = append(out, ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),

View File

@@ -9,7 +9,7 @@ import (
"github.com/pdevine/tensor" "github.com/pdevine/tensor"
"github.com/pdevine/tensor/native" "github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/fs/ggml"
) )
type llamaModel struct { type llamaModel struct {
@@ -46,7 +46,7 @@ type llamaModel struct {
var _ ModelConverter = (*llamaModel)(nil) var _ ModelConverter = (*llamaModel)(nil)
func (p *llamaModel) KV(t *Tokenizer) llm.KV { func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "llama" kv["general.architecture"] = "llama"
kv["llama.vocab_size"] = p.VocabSize kv["llama.vocab_size"] = p.VocabSize
@@ -120,11 +120,11 @@ func (p *llamaModel) KV(t *Tokenizer) llm.KV {
return kv return kv
} }
func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor { func (p *llamaModel) Tensors(ts []Tensor) []ggml.Tensor {
var out []llm.Tensor var out []ggml.Tensor
if p.RopeScaling.factors != nil { if p.RopeScaling.factors != nil {
out = append(out, llm.Tensor{ out = append(out, ggml.Tensor{
Name: "rope_freqs.weight", Name: "rope_freqs.weight",
Kind: 0, Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.factors))}, Shape: []uint64{uint64(len(p.RopeScaling.factors))},
@@ -138,7 +138,7 @@ func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
t.SetRepacker(p.repack) t.SetRepacker(p.repack)
} }
out = append(out, llm.Tensor{ out = append(out, ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),

View File

@@ -7,7 +7,7 @@ import (
"github.com/pdevine/tensor" "github.com/pdevine/tensor"
"github.com/pdevine/tensor/native" "github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/fs/ggml"
) )
type llamaAdapter struct { type llamaAdapter struct {
@@ -18,7 +18,7 @@ type llamaAdapter struct {
var _ AdapterConverter = (*llamaAdapter)(nil) var _ AdapterConverter = (*llamaAdapter)(nil)
func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV { func (p *llamaAdapter) KV(baseKV ggml.KV) ggml.KV {
kv := p.AdapterParameters.KV() kv := p.AdapterParameters.KV()
kv["general.architecture"] = "llama" kv["general.architecture"] = "llama"
kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"] kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
@@ -29,8 +29,8 @@ func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
return kv return kv
} }
func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor { func (p *llamaAdapter) Tensors(ts []Tensor) []ggml.Tensor {
var out []llm.Tensor var out []ggml.Tensor
for _, t := range ts { for _, t := range ts {
shape := t.Shape() shape := t.Shape()
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) || if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
@@ -41,7 +41,7 @@ func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
t.SetRepacker(p.repack) t.SetRepacker(p.repack)
} }
out = append(out, llm.Tensor{ out = append(out, ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: shape, Shape: shape,

View File

@@ -6,7 +6,7 @@ import (
"slices" "slices"
"strings" "strings"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/fs/ggml"
) )
type mixtralModel struct { type mixtralModel struct {
@@ -15,7 +15,7 @@ type mixtralModel struct {
NumExpertsPerToken uint32 `json:"num_experts_per_tok"` NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
} }
func (p *mixtralModel) KV(t *Tokenizer) llm.KV { func (p *mixtralModel) KV(t *Tokenizer) ggml.KV {
kv := p.llamaModel.KV(t) kv := p.llamaModel.KV(t)
if p.NumLocalExperts > 0 { if p.NumLocalExperts > 0 {
@@ -29,7 +29,7 @@ func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
return kv return kv
} }
func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor { func (p *mixtralModel) Tensors(ts []Tensor) []ggml.Tensor {
oldnew := []string{ oldnew := []string{
"model.layers", "blk", "model.layers", "blk",
"w1", "ffn_gate_exps", "w1", "ffn_gate_exps",
@@ -56,10 +56,10 @@ func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
return true return true
}) })
var out []llm.Tensor var out []ggml.Tensor
for n, e := range experts { for n, e := range experts {
// TODO(mxyng): sanity check experts // TODO(mxyng): sanity check experts
out = append(out, llm.Tensor{ out = append(out, ggml.Tensor{
Name: n, Name: n,
Kind: e[0].Kind(), Kind: e[0].Kind(),
Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...), Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...),

View File

@@ -8,7 +8,7 @@ import (
"strings" "strings"
"sync" "sync"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/fs/ggml"
) )
type phi3Model struct { type phi3Model struct {
@@ -37,7 +37,7 @@ type phi3Model struct {
var _ ModelConverter = (*phi3Model)(nil) var _ ModelConverter = (*phi3Model)(nil)
func (p *phi3Model) KV(t *Tokenizer) llm.KV { func (p *phi3Model) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "phi3" kv["general.architecture"] = "phi3"
kv["phi3.context_length"] = p.MaxPositionEmbeddings kv["phi3.context_length"] = p.MaxPositionEmbeddings
@@ -68,19 +68,19 @@ func (p *phi3Model) KV(t *Tokenizer) llm.KV {
return kv return kv
} }
func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor { func (p *phi3Model) Tensors(ts []Tensor) []ggml.Tensor {
var addRopeFactors sync.Once var addRopeFactors sync.Once
out := make([]llm.Tensor, 0, len(ts)+2) out := make([]ggml.Tensor, 0, len(ts)+2)
for _, t := range ts { for _, t := range ts {
if strings.HasPrefix(t.Name(), "blk.0.") { if strings.HasPrefix(t.Name(), "blk.0.") {
addRopeFactors.Do(func() { addRopeFactors.Do(func() {
out = append(out, llm.Tensor{ out = append(out, ggml.Tensor{
Name: "rope_factors_long.weight", Name: "rope_factors_long.weight",
Kind: 0, Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))}, Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
WriterTo: p.RopeScaling.LongFactor, WriterTo: p.RopeScaling.LongFactor,
}, llm.Tensor{ }, ggml.Tensor{
Name: "rope_factors_short.weight", Name: "rope_factors_short.weight",
Kind: 0, Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))}, Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
@@ -89,7 +89,7 @@ func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
}) })
} }
out = append(out, llm.Tensor{ out = append(out, ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),

View File

@@ -1,6 +1,7 @@
package convert package convert
import "github.com/ollama/ollama/llm" import "github.com/ollama/ollama/fs/ggml"
type qwen2Model struct { type qwen2Model struct {
ModelParameters ModelParameters
@@ -21,7 +22,7 @@ type qwen2Model struct {
var _ ModelConverter = (*qwen2Model)(nil) var _ ModelConverter = (*qwen2Model)(nil)
func (q *qwen2Model) KV(t *Tokenizer) llm.KV { func (q *qwen2Model) KV(t *Tokenizer) ggml.KV {
kv := q.ModelParameters.KV(t) kv := q.ModelParameters.KV(t)
kv["general.architecture"] = "qwen2" kv["general.architecture"] = "qwen2"
kv["qwen2.block_count"] = q.HiddenLayers kv["qwen2.block_count"] = q.HiddenLayers
@@ -45,10 +46,10 @@ func (q *qwen2Model) KV(t *Tokenizer) llm.KV {
return kv return kv
} }
func (q *qwen2Model) Tensors(ts []Tensor) []llm.Tensor { func (q *qwen2Model) Tensors(ts []Tensor) []ggml.Tensor {
var out []llm.Tensor var out []ggml.Tensor
for _, t := range ts { for _, t := range ts {
out = append(out, llm.Tensor{ out = append(out, ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),

View File

@@ -20,7 +20,7 @@ import (
"golang.org/x/exp/maps" "golang.org/x/exp/maps"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/fs/ggml"
) )
type tensorData struct { type tensorData struct {
@@ -29,7 +29,7 @@ type tensorData struct {
Shape []int `json:"shape"` Shape []int `json:"shape"`
} }
func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) { func convertFull(t *testing.T, fsys fs.FS) (*os.File, ggml.KV, ggml.Tensors) {
t.Helper() t.Helper()
f, err := os.CreateTemp(t.TempDir(), "f16") f, err := os.CreateTemp(t.TempDir(), "f16")
@@ -48,7 +48,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
} }
t.Cleanup(func() { r.Close() }) t.Cleanup(func() { r.Close() })
m, _, err := llm.DecodeGGML(r, math.MaxInt) m, _, err := ggml.Decode(r, math.MaxInt)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@@ -60,7 +60,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
return r, m.KV(), m.Tensors() return r, m.KV(), m.Tensors()
} }
func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors *llm.Tensors) map[string]string { func generateResultsJSON(t *testing.T, f *os.File, kv ggml.KV, tensors ggml.Tensors) map[string]string {
actual := make(map[string]string) actual := make(map[string]string)
for k, v := range kv { for k, v := range kv {
if s, ok := v.(json.Marshaler); !ok { if s, ok := v.(json.Marshaler); !ok {
@@ -75,7 +75,7 @@ func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors *llm.Tenso
} }
} }
for _, tensor := range tensors.Items { for _, tensor := range tensors.Items() {
sha256sum := sha256.New() sha256sum := sha256.New()
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size())) sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
if _, err := io.Copy(sha256sum, sr); err != nil { if _, err := io.Copy(sha256sum, sr); err != nil {
@@ -109,6 +109,7 @@ func TestConvertModel(t *testing.T) {
"all-MiniLM-L6-v2", "all-MiniLM-L6-v2",
"gemma-2-9b-it", "gemma-2-9b-it",
"Qwen2.5-0.5B-Instruct", "Qwen2.5-0.5B-Instruct",
"c4ai-command-r-v01",
} }
for i := range cases { for i := range cases {
@@ -331,7 +332,7 @@ func TestConvertAdapter(t *testing.T) {
} }
defer r.Close() defer r.Close()
m, _, err := llm.DecodeGGML(r, math.MaxInt) m, _, err := ggml.Decode(r, math.MaxInt)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

344
convert/testdata/c4ai-command-r-v01.json vendored Normal file
View File

@@ -0,0 +1,344 @@
{
"general.architecture": "command-r",
"general.name": "command-r",
"command-r.attention.head_count": "64",
"command-r.attention.head_count_kv": "64",
"command-r.attention.layer_norm_epsilon": "1e-05",
"command-r.block_count": "40",
"command-r.context_length": "131072",
"command-r.embedding_length": "8192",
"command-r.feed_forward_length": "22528",
"command-r.logit_scale": "0.0625",
"command-r.rope.freq_base": "8e+06",
"command-r.rope.scaling.type": "none",
"tokenizer.ggml.add_bos_token": "true",
"tokenizer.ggml.add_eos_token": "false",
"tokenizer.ggml.bos_token_id": "5",
"tokenizer.ggml.eos_token_id": "255001",
"tokenizer.ggml.merges": "902a060cac8884a5793d2a857dd2e53a259de46c8d08c4deb243c239671e1350",
"tokenizer.ggml.model": "gpt2",
"tokenizer.ggml.padding_token_id": "0",
"tokenizer.ggml.token_type": "b7a352ccd1c99d4413bcf452c2db707b0526d0e1216616b865560fab80296462",
"tokenizer.ggml.tokens": "815ac90ff23565081522d7258f46648c8a0619eb847a9c7c31b238a9b984e4ae",
"blk.0.attn_k.weight": "6fcfdb466f9ceb1229404ce4ec4e480751b8d00da12707a11783dad7256cb864",
"blk.0.attn_norm.weight": "6063317f731371864049c7704a70772f1eb632194201ebdc2ed0f8e483507c72",
"blk.0.attn_output.weight": "920f49716a1e2fc73b6794ec777947f1c122701e63ed302422ac89e90f06e9da",
"blk.0.attn_q.weight": "ddbcd7cde197e632564ac58e4f25d9e3a8ca52917329eeb6081eb41a797932ab",
"blk.0.attn_v.weight": "318fc02a189d87420f0cbf57f47f11e00c21ec1ed472ce0a2a895b44f7fa0fca",
"blk.0.ffn_down.weight": "aa71975b6eb1f4c77b03d2ac4a194cf8d95718efac741bb12f0f3ff79a27f9bc",
"blk.0.ffn_gate.weight": "42967702fa0bc738b88dc50007ace26dbe74a5a9e0978124dd093f818241a9e1",
"blk.0.ffn_up.weight": "5282c8788b086bd30f46525e7995a17464882a72703fd27165491afdd8bfd4af",
"blk.1.attn_k.weight": "cd248882e64fd2c3402c44790ebe12440133dc671b6893fdad0564c461973adc",
"blk.1.attn_norm.weight": "ba84e1c8fd30af6ec94208db4078befac8c921aad3acb887812887f3282ea2be",
"blk.1.attn_output.weight": "2efa3ef7c5666ccceb05e339b83ad680cc0d2c3ec78203f5da5959f23a80e14f",
"blk.1.attn_q.weight": "5106f2e255358a1303c22e8b5f0ec044852bb30a866c52cabefd30017a7a6b7d",
"blk.1.attn_v.weight": "a211a634a1a5df1d5f973645438be0461dd922210f9747c6b04e386c7f1ebe95",
"blk.1.ffn_down.weight": "37093afe48d32c578ec956c9ed85242cd000d6aa979e60526aafa10c822dbb10",
"blk.1.ffn_gate.weight": "469860819e9159caefb1aad0bc66db790f3393f05fd87b08e52256a7ed256543",
"blk.1.ffn_up.weight": "736742c97d35d1a011f9cafd3c0ce947ad559bb2fba6da73c816f6bfd0fa9aeb",
"blk.2.attn_k.weight": "92c219d92804d832ab404bd6dc7339c90877bb7cf405dd030c121f8b27757739",
"blk.2.attn_norm.weight": "61e4466069474b76b6d1e702566420eb669faf3556b00ff7b824784aca13a2d6",
"blk.2.attn_output.weight": "d2fb38a2b2171fd91caf037faa585a62225819aa232d86fd4f7f9d2c3c8a45e9",
"blk.2.attn_q.weight": "f6faf5cc6844e3daa4f9f68d90f5458c64879de68a7728860e38374e30c3429d",
"blk.2.attn_v.weight": "f340ef8f7341d987a6f37c0e9afe0aef5be67be00c0ce5f57612daf73319cce1",
"blk.2.ffn_down.weight": "c7be61a701d779860b621b143fb6365b607bf99ec7c0f153b07908ac8120885a",
"blk.2.ffn_gate.weight": "b64f0878187bd3392abfa4c3e8ad2f8b4c133903e54246747ff8f3b4639ad83e",
"blk.2.ffn_up.weight": "50b11c712652e90ee7428dbb45cffebb80662ac982bc72bd9eafff361b5eb5a8",
"blk.3.attn_k.weight": "2b7bcbe9ee5c9c630c8c8d7483887e78b73581016f4cbb6933db2a147a25f431",
"blk.3.attn_norm.weight": "0181dac7f4eee7252980323e8032cf339bef2046ce0a16c0fd72af7c98a8a37b",
"blk.3.attn_output.weight": "aef8843b636ce231da9e7c9acbee197883cc15df0e2887709324c6a50f16da7b",
"blk.3.attn_q.weight": "55404130fa10e81322d33eb378aa0de31a92990ce7730f1338c0ace0406bb1b1",
"blk.3.attn_v.weight": "76f7fb8040d82b957d689ce34fea2302a6640ad5bbaa0052ad2b7ebce270c33d",
"blk.3.ffn_down.weight": "648628933eff3b357c3729c33c5b1ae51c28e59b9c19acd1601a2ff7c5d5d9a5",
"blk.3.ffn_gate.weight": "6a588885d16e98d5f50ebed05af089154f680085ca9c97691e5b489088630a4a",
"blk.3.ffn_up.weight": "e12455a1d702f4986e1a663493e3d5102b367af74d45557522002a35d63ecac2",
"blk.4.attn_k.weight": "40d943380a8a85e4eab147934bf6e16f23cc8ab753f6636526382c074d182288",
"blk.4.attn_norm.weight": "4ab2c098983d4599fe540eef624c4df954adb7473faebda7471ef0ba4134814c",
"blk.4.attn_output.weight": "d14b91e40f58bf4a3c8c2eca0b12bb541de406574af39027d56f6c588a147082",
"blk.4.attn_q.weight": "e1224960a3562107488589f883fa32414bae41712fa8dbd47c5f3e3a7801452f",
"blk.4.attn_v.weight": "063f297bc4aa6e709fc32c4c32e35af7d07d80e83cb939b76adbba858006c03d",
"blk.4.ffn_down.weight": "f88a18020c5e1caaa29596895eb348e76ee5bfad27ed57651a86cd8cd1f9b5aa",
"blk.4.ffn_gate.weight": "48e7e1eed3fb52e92e61d3557dd0ec002418327090e034ce4322fd68542266f8",
"blk.4.ffn_up.weight": "1ca8a7aa17355b6ce0d9ad5539fdad3899fa47fd359c285fbfb31f19f47bf073",
"blk.5.attn_k.weight": "2bdf15f8e73d068d972380f25d207004cf0bf3b5bfa46946803ba6fba07d9175",
"blk.5.attn_norm.weight": "60448d7cde6e1b6467aa31bdea012e39cdb08c88081cee7d102dca4f93f766ef",
"blk.5.attn_output.weight": "f9f687d7c457537f9fca8a4087a59f1c3bebfaf5537b94e42c831a13224f7799",
"blk.5.attn_q.weight": "987db7a2ad68657a92625e1980effbb1f79697c2183f2b9f3b3a0570c51b0ab9",
"blk.5.attn_v.weight": "cf696891148f3e4783ad1d20f93462ae091eb8651c656bba9b662253b6263e02",
"blk.5.ffn_down.weight": "c0662b0bd0929136005fb9d691fdd9b2c33867d9ce9622339a6a456b720b059a",
"blk.5.ffn_gate.weight": "200bbdfab615d7a3a84719b6ced7751e3ce52757ef212d96f87798bc1de5e987",
"blk.5.ffn_up.weight": "df5d23e7e035fb1b9d163da7ddfdfe38da6a37e86e96534dc02ad20f011b55b3",
"blk.6.attn_k.weight": "c0dae2d272a7c5a2fa004bbb8475dbab362fc1f6d008e73d5a4434a9382ac6ba",
"blk.6.attn_norm.weight": "51c57ac8b55e04354d5dca6bb9c0cf4177639d3b038e80209e33036209688f64",
"blk.6.attn_output.weight": "229d97892c62f85bcdf431675250e01c976ad69ffa450b01fb543bf88f14a2fb",
"blk.6.attn_q.weight": "c20e49621821bd46ed156e6823864a5bda4f317750e71ab8dc54e44eb48cf7c2",
"blk.6.attn_v.weight": "53ceb1a2ee43fce3c7b5b33c58a9fc5ee7f44dc1c6f29bc9dbefc37582102dc9",
"blk.6.ffn_down.weight": "7923c943b7629d560a032d1efa210d1d75c6692140f1be94464ee7ed24f44ed0",
"blk.6.ffn_gate.weight": "57593d350361af753a6a39f53b066282634c0fb44f396f6f2966a574b01d8f8c",
"blk.6.ffn_up.weight": "327b6a7a387098b8899d3ded04a4d4e7c658ca61b80d4e7b17594be232721602",
"blk.7.attn_k.weight": "9ca48b87a10116fd8868e62b76f211d4bb91f166096be9061439ee2e1c3a5c20",
"blk.7.attn_norm.weight": "cd56cfcc4e2ad6b96e23ea7b0d32b4caf236107d99a0b22c56760b62e63c8cfd",
"blk.7.attn_output.weight": "7352b509a03cae2491ffc060e577d189341a0f861233f18c96f9d275dc4234bf",
"blk.7.attn_q.weight": "2b3791c8c008c33ddbe12bedba8191322ceea2dcce5cf0eb7a93d40ad254e672",
"blk.7.attn_v.weight": "3ae721d52466487a3d48150581e57f6d64ea1e83ab929f23b28c3d777422eeb6",
"blk.7.ffn_down.weight": "3b6fa8ececdb3c34af3a5363863d6f94289c1c95bf47fce3a3ddcf184c5f0848",
"blk.7.ffn_gate.weight": "dbd7df6c5ae5eb4adb859f0d36453813a4e289a359a1ba8f72d67fcbf21c3e22",
"blk.7.ffn_up.weight": "de68380a334b4c5cfd4c318b0e9854aec59bd79aa0f0c30af3f56414f83482b0",
"blk.8.attn_k.weight": "7303c4e4480abc72a7ee271811311199245fb5c2ea27a2bd3b8cad3a53a03c27",
"blk.8.attn_norm.weight": "2e3d1921898d1b943ce1a1b6818546c8b471d6d542da24f51a8b514b8c3dd4ef",
"blk.8.attn_output.weight": "30421520887b66bf97a18dbcdc283bc8d0b60590b612fd638a319a6eae923227",
"blk.8.attn_q.weight": "73e064d5433c9b500068a1c31744dbd53f4ade298fb450a0e8c97f62cf1f8a8d",
"blk.8.attn_v.weight": "27e21f8b9a9a8533e8178ca34a72aa1d786393d57302b7806dcdf3e51de511a8",
"blk.8.ffn_down.weight": "bf694bd8e00047982108000e7b3dee7b225db8b19abc595e5697b6bbefd92e7c",
"blk.8.ffn_gate.weight": "d55fdbf8606d9141b774b0500c58944fd1253b9e69d1f765eaa9a680b9f2ca40",
"blk.8.ffn_up.weight": "1ae3f580655e7c8e8dd6c34fa4ac574fdfc5e3f1a8536da0c5442d3a2976f0e7",
"blk.9.attn_k.weight": "b18080626012d8aabcf78542d6c7bf31c712bf55a70172fbfe173fcf34481036",
"blk.9.attn_norm.weight": "2e3620620dc09998c6d3063a7d5de5433fbbae8c11e5b00d13f145d39140e162",
"blk.9.attn_output.weight": "69c3c0e27ef1c0fc933eeb7b612b70909f18cde238873c0d576a2ba9714ef174",
"blk.9.attn_q.weight": "68330e5aa28a28873c9a6e67f032186ef651df2df5844e0f27094ba349fbe4ab",
"blk.9.attn_v.weight": "3df8d45a102be082d0793a51cb82aa62a43cd0e9d047ba4115ca0f2414b39325",
"blk.9.ffn_down.weight": "1d6cc162b73745b135b4f040a0aac3c06d5135a3dc5b2421e7ee2af48662fd7f",
"blk.9.ffn_gate.weight": "034a9d40fb1e32b534b45f4bccd65cbe43c4a6a3f5d01132bd245ca0005de5fc",
"blk.9.ffn_up.weight": "c838c38d0e1a0ac0da17eb2a66023ed31929f07d8fcfe1cc546df26096c91f0c",
"blk.10.attn_k.weight": "a78507cb72f744b86ceaa032596e74e5571c822d0226d334881169addb32cbd5",
"blk.10.attn_norm.weight": "35f48d0b28ee0e6b4cad4e983925737562d64824be5b168b3e26df3d6b260cf1",
"blk.10.attn_output.weight": "53712db06796de39b131323e7abf9a58551b6d52da6db66a471580386d396252",
"blk.10.attn_q.weight": "efe08429ba196026b81cd1c471e1c7418afd9e966659feb3936b674aa0803b58",
"blk.10.attn_v.weight": "7ec6055e134f89da0cbe79ec9f13ef2e442ac584b1f03c3e13e7d0cdad0078bd",
"blk.10.ffn_down.weight": "37e66af4bcd1f3079e841e892255b8255070655901864ea3a8c602a7f681a640",
"blk.10.ffn_gate.weight": "1825282bc34830d371c6edcc3c1e73e6ecc1e10f4aea0122dbb7acc1d6f7b1bc",
"blk.10.ffn_up.weight": "819b3b276a4d4c14a35ed6682d5ef18a5e8ed468e5ce3f12e8c75ec18ac20ec4",
"blk.11.attn_k.weight": "5327e6a2af82dfff0619a14971f5864a15553c36fead84e1af42c7630f2729c6",
"blk.11.attn_norm.weight": "fec363b3c4a43036d2c635fb8aa9e122dd87ee79811839f2f6cd955be3373e7b",
"blk.11.attn_output.weight": "ccf7b38f18ee8798b8a6a35018e2df3eb3e007de62876befb68025dd66c79763",
"blk.11.attn_q.weight": "da8c4a1c824ffe174e39f126cd72f7ef83c56aff1259d452a1212de80f98f5e9",
"blk.11.attn_v.weight": "d17ae6bb77f03982b55d341eb67acb5969e9ad3da5994b96eafc09793dcfe3a0",
"blk.11.ffn_down.weight": "a6bac521e2791345f22c57205fa1c2f2f687794dfd24d0e98d50ae0d0eb6088a",
"blk.11.ffn_gate.weight": "5ed902c488cb51ba5635f3df08258c5f84f31a679a00211ea5f9d8b824ef6d9d",
"blk.11.ffn_up.weight": "ee9f1437eb890d2cf9df2574afa1cecf20aafdd847cd75b152d7eb74419afd34",
"blk.12.attn_k.weight": "5a069c06e1019b0f889088e67458f7a11ec77fa190ada6069e46211f62219947",
"blk.12.attn_norm.weight": "194d7e5fcc8c49aea62daf1940532419cf3c505afdce6be377286b677db5db8f",
"blk.12.attn_output.weight": "6534995fd4d6fecb55e317add4b1723aba4d825e1e9471d0b08813dfdc247176",
"blk.12.attn_q.weight": "4ab51ca519b5995581fa34f846276feca3b907ef2b51f192f6cc0b3263c3f5a2",
"blk.12.attn_v.weight": "5652ca3fa81ef9a1ac1543d71fc6813f8517f8ec54b25c701f6f98061614830f",
"blk.12.ffn_down.weight": "4b2c263f54c88516b8eb273bb8d9615b01c5c8b484dc70358adb91b50b300edd",
"blk.12.ffn_gate.weight": "8f50c3c3e3e8568991d6c1b0e74b500cf4f208e7700bbb8e87c3f6a6d359b6b5",
"blk.12.ffn_up.weight": "1c1a581fec1fbe959e1427fa513f400100b5e1ee9d83932630be9905fb49c231",
"blk.13.attn_k.weight": "efd7a38c46f08d8376d82974f33c644e3a02220e142d63b1704718699a8a884c",
"blk.13.attn_norm.weight": "d28fa4f1bd75abbd063b0e622e08f579c89cd0c0c5ce63c1952ec9f944f8ee13",
"blk.13.attn_output.weight": "71e0068a639288718bdb70a6cfdefd50bc8b3ec3993347a65129e70001ca5827",
"blk.13.attn_q.weight": "b97077adc92cff07a2e07d80ee38f214ad8713571c69cd5c70ebd43dc501ac87",
"blk.13.attn_v.weight": "79b3e2749ab4b459c81e96e322b215f1e8af645eb346e176c326bd00cf6ed2fd",
"blk.13.ffn_down.weight": "9f8687d11effa1db7cfecf7bec5631734bcf2962aad74a9f519144491e08ec85",
"blk.13.ffn_gate.weight": "7d14dfa0543852e7777fe8fff29ca533744cbcf1ebcf10067e5adfc4eb345e65",
"blk.13.ffn_up.weight": "852b9527b97fdab211ff3f832a660ee1d93ccb56906144c50f01319a6e8ee615",
"blk.14.attn_k.weight": "79e926b20f36f66d58226cb358881f2f68ae7b468787d33cafae5110287a14a0",
"blk.14.attn_norm.weight": "97d481b63deb0df6142c2c6cd23043720c62eb609e390f47a7113751c79974ec",
"blk.14.attn_output.weight": "aa6e94d7176d5c79fbb89b96e5f13ce75702ce3dd23ee52986446da436a6c3d6",
"blk.14.attn_q.weight": "214becb6d1bb460da9fb8ace0f99b9a5afa9edf7aa7acc19606c7401b11d6305",
"blk.14.attn_v.weight": "488b0e6d7f1a7a2ed0972aaa6d10ef9c775ee5373460324efcf5b3e3da9311df",
"blk.14.ffn_down.weight": "29c7ad16cf9542e30996a1a01ab95b844533b28051f04cc7949c371afb796471",
"blk.14.ffn_gate.weight": "b7ef208f2b054803665b377f5a5980c122c026841809cf855c6ba06d1c3a885a",
"blk.14.ffn_up.weight": "76a5cc28100748d79c4398ce7b9176aab4d661548b6293a82f99144812e5b70e",
"blk.15.attn_k.weight": "a6b8f9e98ab878fa7ebc5d080978ebf2d050acc2ab2fa8ea9188eb10e27702c8",
"blk.15.attn_norm.weight": "a26d07a9752d6dccb68e3a8a2a49fd0752cdd0a415e05547819bc37d9ba63d5e",
"blk.15.attn_output.weight": "c63616c69048ccbee801e05be4f56d21fda21aa0cc470f41d57c31b4d9283a4d",
"blk.15.attn_q.weight": "fd595a67bf96c6ba16eb148a9d02fa52fa3c1d33ed10be28a08f851409fd6e64",
"blk.15.attn_v.weight": "1c5c9d33fa07c05d5f4ed0032c6c4aa83d863f0d31c94a66109d239dcd03cea3",
"blk.15.ffn_down.weight": "585ea62ab8aff7d7d212ea5c1a03226fda6b68370c890b776834af70c948dcbc",
"blk.15.ffn_gate.weight": "a13c63f86f879b03a573d5dd2a25cfd1f4dc73e8132e6454ecc23e538b4cdf6f",
"blk.15.ffn_up.weight": "f7112450f57c12fcd511f049e0dc0b541625a107a7901c3261ed9e984299f65c",
"blk.16.attn_k.weight": "2d2c8b11dd71fba6d1c106aa1673c113a5448653cca7eab897c8739212ed5003",
"blk.16.attn_norm.weight": "95c2ec7be9469690e18a9a1779684acb3e9da44b13e263a0da840305646fbf8a",
"blk.16.attn_output.weight": "31a65046e677f54dae654ded4e733479fcc0f7283d83076b7dc7cbcae8528230",
"blk.16.attn_q.weight": "bfc6292b9c6d49b7118d08060242a138182eb182d136ba5dfaf469437c16081d",
"blk.16.attn_v.weight": "68f81d037340217d87c7853ff4d6edfbc46d9e827ee6d5bff7c3f6238e3a95ad",
"blk.16.ffn_down.weight": "bbd6629691950cef4d5113e1c6670e91b216a9b872cb92cee02dfda4d6c4f7b8",
"blk.16.ffn_gate.weight": "63cb56f282b7401ed6c76e5bb6fdf1bf68a64f9af0c82c014209b55bcb5191d0",
"blk.16.ffn_up.weight": "b54f39a2541063cbfb6f713aa81c3b69a04100e999aa2ebbeec195dc382eceec",
"blk.17.attn_k.weight": "3d9ba49799cc56664ec30a002bcad61eb651294212a68c3ddb573eb042aef5a4",
"blk.17.attn_norm.weight": "42ee0db4b9d63257bca0012a30b12737ead1caafeb5ed3d93c8f48ffec4b46de",
"blk.17.attn_output.weight": "a38fd100f05c9041c592bc739e287de0b10d08ef2bda41a879225bdca9002f71",
"blk.17.attn_q.weight": "8a3bee285b0180a9eb35662e449ee4cbe16d992bdd48fb3a94bc4a347728cfa2",
"blk.17.attn_v.weight": "d7f8f1b8b863494ed4392a1656775912e9b264ad36016547b12e832a1d6757d6",
"blk.17.ffn_down.weight": "bb7ee58f61da8630972e25b621996fbe8ec06f4dc9ab1e268ab5b120c526ca28",
"blk.17.ffn_gate.weight": "6b652dbf167fee09a45ebfd78d500ff6548fb2756dbe5343ffec3f7e6207179f",
"blk.17.ffn_up.weight": "3b67f727e55e742715de978fab80457781e7a3762bc48f79d13b45dcb8de664c",
"blk.18.attn_k.weight": "ff7fe57c57b90c6fcc0aefc39ec24593c3a7d1ea1c23770480075a015450e0f5",
"blk.18.attn_norm.weight": "1d40faca082d2633ef0ccf19e121870dd6c7c3e2154607c7f3543fa96e99cb2d",
"blk.18.attn_output.weight": "9adfecaaa397a92db4687efd5fcabfa0daef9e6b0493763b7ff5ebc185c43a6c",
"blk.18.attn_q.weight": "ad1803eb9b291948639277afe981e666b07167eb3fcae903ba5b73bf86d8f50b",
"blk.18.attn_v.weight": "308cf23399adccf27401a4ab60d74dac6fb9d4cd4b9c5940d9145118d1881b34",
"blk.18.ffn_down.weight": "7de4ac9a561fb580619b745687dfd7ca8a69ef70471dee978741b80e9ff7bead",
"blk.18.ffn_gate.weight": "0c66970f696b33bd5ee8f1f2fbcb41fd78fa5ccabdc927e11a4d5a4089f19c69",
"blk.18.ffn_up.weight": "66a42e988e8a1f468fabf976c48e9e4bb045eaac6916ef16555ac101cd674abc",
"blk.19.attn_k.weight": "a928ab50390bacbcebe2e4b66922498134ce22d7b93beaa87d6cf4ab52eb7174",
"blk.19.attn_norm.weight": "b4a02c55b46c2a96aec9c64a254087cf48e6c1d4b6f31782c77a46fc4daebad1",
"blk.19.attn_output.weight": "b768319c641dff1eac5d1f8ceb960c9899c795bf2b24c1d6bf70aa24fda45f77",
"blk.19.attn_q.weight": "79ef3f57d187d3954a26362096e1b6c222d76f537dff73e034d6e9999935b8bc",
"blk.19.attn_v.weight": "ce13d6b13e24fcb2d5bc6a2662e5bd295b31b12db10a6d0307f86cf29b8d5001",
"blk.19.ffn_down.weight": "cf90d7e2137482cfd50934a8223ad774621d08554969da80a9712df5e6227eb0",
"blk.19.ffn_gate.weight": "71ce30150f003b6eeb3bf7464e05b6ae615f135110d8e47f0a47fd973e537c0f",
"blk.19.ffn_up.weight": "7f92aca0cc29866633feec701ec01a85a8ee2fd4e2b9630173a6cffb1d9d50ee",
"blk.20.attn_k.weight": "a2df23159d6fb74ef28e14b61028fe8b00a693a2fc9234a980be74f20b958682",
"blk.20.attn_norm.weight": "c6cd5f1b096fc5efa4eb59ca1c8c4bd28730f3dcedd59a63601663eccc6724ed",
"blk.20.attn_output.weight": "896a8a166d0f006d4b09867ae4345426303cbc3fb13a18d3d4e1bde00f16dbdf",
"blk.20.attn_q.weight": "01eb79588fe61baea0da43e99f4dc5939590e1bafd01e12dadb8326f102bfea2",
"blk.20.attn_v.weight": "bd39630fdd5a7c859ac1addaf53e63faf524c3f32f5f4896d86b6e746b1d5c06",
"blk.20.ffn_down.weight": "0304a5d39957a0e3f031c4bcc4549a135d396c8d97c8d276fd1c823ce86560c2",
"blk.20.ffn_gate.weight": "117b79d595b1dca0c8b37586beaecc4d84411507276212dc286cde7fc36c9bef",
"blk.20.ffn_up.weight": "6e799346db145c125f01783539749d3828fcc451cd4f10c5352f047a47e28714",
"blk.21.attn_k.weight": "1c37e4c0664147e775bb006b226b9553e3421140cd96288ea755f81731ab80ba",
"blk.21.attn_norm.weight": "00ae783a29000ccda5e4bdbff03df0752fb82805dc3f9b987500ebd80714476e",
"blk.21.attn_output.weight": "7588b84f9fb19f15095b5265c60b4a4e7ae74bcc47d4607dfa5d0bfab6f136cb",
"blk.21.attn_q.weight": "a65f1c0dd06d45bb97532d3e932689c1eecfe7359089b39174a96a149335cbc1",
"blk.21.attn_v.weight": "4220b77e7d5e8709b4eef33a679b5dad11f297085ef44c9977f9e54ef08f7a2d",
"blk.21.ffn_down.weight": "b8c082a0530d4b5328e67db0df84c5498f2af956de23c639fa0198ffea853950",
"blk.21.ffn_gate.weight": "cd1b656ee72d00e9835ef667c19ef89a88de261eb8eb7c0e936e0f9ddf83ef9f",
"blk.21.ffn_up.weight": "dc445f73e36ec7a3bd86884186b728f8e0187f32848c3b8b69d4d41f8571bf31",
"blk.22.attn_k.weight": "e37cf0b893ec8b9ee8c78dd139b8d9c45cb997a3bc0c3d93a70ca1c3f6af8859",
"blk.22.attn_norm.weight": "248a27838d3c46cc03a5c312facc84e2e0e2c990ef8401e93da25918497f88d1",
"blk.22.attn_output.weight": "fc191a18f6d18332c66761f7ab28008bfe295dd1f5c8741a2488442f9e00d0f5",
"blk.22.attn_q.weight": "4b193a2ab8bc2b085db18f2bf3eeba26e02b537b2cdd738160c8f14b165d0f5a",
"blk.22.attn_v.weight": "7a60ce5ccac7e045e55ba1e1e85bd2a0f93f8c781daee96c5223665e22f0c666",
"blk.22.ffn_down.weight": "e0a34fb4244e2c7168f3dbaa1904c15d339ec39999cdf27128bbaf619ee0a237",
"blk.22.ffn_gate.weight": "8bac872d4b8549c8812f927efa309f1792b524f33601095fff61b826de5a5615",
"blk.22.ffn_up.weight": "b67fa2b94dd901b6ec64c0853ce8ca2d86fe9cb1cc6d2f15fbbbe0e691c0c648",
"blk.23.attn_k.weight": "2c32e66ad01942b819ac09a197c71579fe66f02226a264fdd72ad1e02c67a27e",
"blk.23.attn_norm.weight": "825fdc94deb439cb93c713eeb077c1052b90ed658d6d464fc4ad3d611e911d48",
"blk.23.attn_output.weight": "95ca6707a95b8750b0c7c5d379d368f0f2e7ebef631954e7d4d8ec0f41f13a3a",
"blk.23.attn_q.weight": "6eccc84faca5fac015d1b26e2854501edcfd292a302228fe14cf99f5eb59a34b",
"blk.23.attn_v.weight": "b343ac3d226040f1033ee049668aa1d89b1774bc18431965682e5dbdce78ccdc",
"blk.23.ffn_down.weight": "9fc599befea8d3b1e342d564a110074f66d2542df406c4b90b6bdc5828fbb2b2",
"blk.23.ffn_gate.weight": "488556c1b0c9f0b20b0c99b4bac2e0f4046b81edb601d7b91e7e5b3bab47d667",
"blk.23.ffn_up.weight": "1088e291d7008dd9c7c2dd6830af686a8a84b724d123a016209bd5156d6898f1",
"blk.24.attn_k.weight": "a923fbe35e61e009a53927d7828818e0592bb737d6a1106c4b0b5a1efc367e07",
"blk.24.attn_norm.weight": "9b51aaaa939cefafdd9b13a7e5b74ac7fa2d603427e55a16a909d6f3f353750a",
"blk.24.attn_output.weight": "1beb2baba56f8409466434b037771248c2f620ec5f53e15f44c271d5a2d9ecf4",
"blk.24.attn_q.weight": "4b0194fe5bfae0c6bf6131dcf8cb6e2b994f6ea10b27cb03574f0f4f8cc0c950",
"blk.24.attn_v.weight": "6ac34b1ab0f66226d85bca1194a7c212cd93d384ecbc8b8395de48aec0970a61",
"blk.24.ffn_down.weight": "5508f74cb732a662c2936b32ac5e90742d172b9f961a747b0e5cba0e5906a89d",
"blk.24.ffn_gate.weight": "095e39b8584403835f9bb1ac33e0e81f54175575e4800273d281b845bff381e7",
"blk.24.ffn_up.weight": "2d43ec21637dda12973de367b0113ee9840b0d815bf6fce042f7c3f270b0b530",
"blk.25.attn_k.weight": "9e2aee029f3d2c7f67dfc7926e72c8228fb978382c8e5a4701bbf82c93801419",
"blk.25.attn_norm.weight": "220cd7164fb4cdbe22d26058e4153b26c27c7b5ce2bec8e95bf2c0ea08d23103",
"blk.25.attn_output.weight": "a17f4a5dc6aa51f03dbd75602d98e9491767c205cdc2c3a5f8667fc54bbf7c64",
"blk.25.attn_q.weight": "f60827496835c440c794bf57ce9780704d10a59d8229886bf75ebb18900ba4ef",
"blk.25.attn_v.weight": "9cac217e9e9f4f4c85f14ee51165a77c580165bd4a34b202389169bbe61a1ced",
"blk.25.ffn_down.weight": "a0f36949b663e80849581dfb71e7babcc73580793bbcb0c80ab26d5a6e000359",
"blk.25.ffn_gate.weight": "df4d1be4d50d6afe5ad3ef0d0e0fac76a33e85c963dea769641d612dd53e7d13",
"blk.25.ffn_up.weight": "992da76be762632e25ebc5ef4d03728eece1b43f7c4e31827df19ca724aea694",
"blk.26.attn_k.weight": "34199ff856ac32a500c754539d070258574192a34ecba87a182897cb59fdff52",
"blk.26.attn_norm.weight": "a8e9dfb2dae5d22b5c0aec5f3675991c0e3c3e6a44153db2579136b73f456e00",
"blk.26.attn_output.weight": "1c4f257ffb0d7db0f11cfb275e38b4af736917b43ad82de1badce3f1d227da4d",
"blk.26.attn_q.weight": "33d55786274c2e718cf61e8fbecf3dfa5ee0c208f0b716d42b061f55459acb3c",
"blk.26.attn_v.weight": "684b636939cd4ffcfec5a6238a0790ffa43d853c95783af9b9e8275e74071a7a",
"blk.26.ffn_down.weight": "89d0bf066db154e6d312b5433aed1714f6a28b40f4c52e3e1530ee07703303c8",
"blk.26.ffn_gate.weight": "393d649bebe5e2940e1b043649f6c860b4b8b9f380f30e9da1744a830f358156",
"blk.26.ffn_up.weight": "179edc85ababd9d8440cc6093eecd1004290aa1cb96434b26ecf7585b6cca17b",
"blk.27.attn_k.weight": "334841445a7f1e14731b08f56eb0b1f0938c63823d28bc6d078c4c5f05b36f19",
"blk.27.attn_norm.weight": "57344471bbda2e9deffdfdb2dd05a07aa47f8761e24de53525588639145bf551",
"blk.27.attn_output.weight": "506126af9ee54b535d49f97e36f630e74834f480329f098d6d62e96246d8d65a",
"blk.27.attn_q.weight": "dd984df1acb4783849e25ba7ae378bfd385cd9efc540fb798cd5bdd873f0118f",
"blk.27.attn_v.weight": "b4b3fe9a4455d34c297ff20a2f537b647cef424741d840a747b265f23d320ac0",
"blk.27.ffn_down.weight": "621fdb185ba0d35ba5476dae73d2c81ec1482a0e878d5bfd5c3b29fe837af013",
"blk.27.ffn_gate.weight": "e4fbab45f2ec506fa374103251a0bdb7baa6f576080bdd796f3e9db92098e08f",
"blk.27.ffn_up.weight": "a0c57e463e988002bbd6a6c6792baa21a65e6f89ae303a2c301951b0ae6e4bbe",
"blk.28.attn_k.weight": "bac36cbd52ec5056841663865e1291ddab4b47ef9a2544dd285d4503bfb0e4a0",
"blk.28.attn_norm.weight": "5774a9df2bbb2e86d1f70179c7b92d81e1f401160148b3328fb64db6646a5425",
"blk.28.attn_output.weight": "e8712622d1569557000c75f26c3f55fad267fd300463c2c2cfe3afbfa1c8f908",
"blk.28.attn_q.weight": "11677751fddee52cc739699c02836f7be54d96038be4240be5d4f53d00161608",
"blk.28.attn_v.weight": "e5ee459b8958d65e1445997b9aa1e90e2f5d17761ebcf5357313119a45322507",
"blk.28.ffn_down.weight": "3934518f9f85292da8475fe38a8edcbfc4e24ac56c351b472d6351f98750871e",
"blk.28.ffn_gate.weight": "6ba735d57e98d0847e487f25ffaa25256deaa8abec76f428cb70bd9774279d83",
"blk.28.ffn_up.weight": "977fae6e1e5353114fc645dd98429464749758765cbc6e6457593d596e57850c",
"blk.29.attn_k.weight": "8122a457307d580ad6f1e0acea09a2f593d97f595ba0d6737f5fea16d2433642",
"blk.29.attn_norm.weight": "d626f721e05aa1202439b01027031d4caf1adace61ed37870a277cb6297c77cc",
"blk.29.attn_output.weight": "7fb7122ab1b6b1e6615ca746897da27bc52c92cb70d3147183cdde61795b72b3",
"blk.29.attn_q.weight": "be43e94ff6b6e391024dc824101efa0ddf4005d5b002ac26cb03765c0c73c2fa",
"blk.29.attn_v.weight": "af93c85ebff908f74f9935b81bde0516ca487c84139868a1ce079c3ae20036b1",
"blk.29.ffn_down.weight": "39dae12340ed3120bd19c495fe0872b559613641e41fde69d02d8631900b84c0",
"blk.29.ffn_gate.weight": "36fd482439840ef197c9f3b8905d86acfcea49bcf018544106ca465d4bf8d5c7",
"blk.29.ffn_up.weight": "5243fbdfdc1e2a1dd84b6210a9869d18a014db9088897e345240cdc99990bd5d",
"blk.30.attn_k.weight": "948f263616bd3788b2b968baafd69b9c5bd1b77578665f096c4b7e247b4cea42",
"blk.30.attn_norm.weight": "e168df981e744874ff303faf2eb470e5f6868c2040ba5f383f6c5148669975e7",
"blk.30.attn_output.weight": "4cf0ccca04b792573b756655a24fc89cfb1f272da8305633f0bc66ef14990b93",
"blk.30.attn_q.weight": "21e07d6cba6c50d65350289258209717174a13c42be57e8141d69712cbaf32c1",
"blk.30.attn_v.weight": "65a8ca29c7237b3182ccf03e2fc94e84f9a53d0e160fb679ab401c853170dd9c",
"blk.30.ffn_down.weight": "8b00500a6d00d84058f6658ee1d6f06fb4fcae2f90d4341792259362923b3c13",
"blk.30.ffn_gate.weight": "5bc0e19ab7a31b50ac2118ad1b36e31055271a322cd8ff661d47c3ac0210703c",
"blk.30.ffn_up.weight": "f37a0561955725bd59ee2d064fa9f4e00a12a1b620b624db3bc3add5330bc321",
"blk.31.attn_k.weight": "9a5663edda227f5d87533897146764f8e8a7481b9e71fae197c39204f8463221",
"blk.31.attn_norm.weight": "060a4f438a1ee5e220b5b5278ad2f5c085a428bf38c515766781815597c87529",
"blk.31.attn_output.weight": "6ada5d3cad9dea4780ffbb43302bb6ccc2f24eddd0fc4f5f84c9ce0fc0c6e5dd",
"blk.31.attn_q.weight": "bb5d08c08603907981ad388d5d8b70fcc9b98034ba264b8474c8890cc0297af0",
"blk.31.attn_v.weight": "e01b4252ea9c6a889c32b21144b441a347464d04536ef4f6572425be55759796",
"blk.31.ffn_down.weight": "8ba4d679c36e93ba65ba03180385ef35ea86b3b7cdf2fded9df59369f1c09630",
"blk.31.ffn_gate.weight": "e5b41dc93645f8b5e8eebae3ada3ea43a18f97ce2654228655170b07b463ccb0",
"blk.31.ffn_up.weight": "25b88cdddc8b547af294ed107d3d1312e90b983cae87936fa6062ecd8ea02539",
"blk.32.attn_k.weight": "4bcf86dc0858c8ca2fbdf6aa76674d43eb698f78979fdc1a38f556a7af1facc4",
"blk.32.attn_norm.weight": "cdcc12f3b8b9773c6722736bfb748a2729230b21478cbcc4104859d3148df815",
"blk.32.attn_output.weight": "d43f1196822995ed89a9365c97054753a8b30ce20b6e273c8edcc42673a1e141",
"blk.32.attn_q.weight": "ebf2972bb3865cbc5be4840113a322089752038344beab2a0122c7cb4fb399b6",
"blk.32.attn_v.weight": "714db81704ff34fa137512903c1013acee7877467473e46600728b9240582eb7",
"blk.32.ffn_down.weight": "2cde3da1258bb170a79d5d3cdfe10c86a71eb34b77da46b74c5ed71e7f4fe274",
"blk.32.ffn_gate.weight": "c7e1ed792532613ff9d4e5834b6536e2e0f47df2303bc0fdaa90aac0c1f4e8db",
"blk.32.ffn_up.weight": "d8d6f13fe66a716e28f79101a29817f0c0d6f99969a6f017d51bafd1a16c600c",
"blk.33.attn_k.weight": "a0a28f6cbca88da00cab2ca37094d9b0503bf9defdae77b91895b911c408cbb6",
"blk.33.attn_norm.weight": "0251200c24cc8445607ace6dc8c5aa0566567997262b7cca53a11ac23cc564b2",
"blk.33.attn_output.weight": "b2423205bdf6a1096d43c44d8d12f1a84fcd4e1bb70fcf6dc8542b8b8a71a13c",
"blk.33.attn_q.weight": "00b425c3ef71065ce5e0234e702bf38143b4952da78a85f52ab2c2e3073d97ab",
"blk.33.attn_v.weight": "035edd2335df816c42c765a5e66b9d9b9e15a822a8dc1863508145499c942c14",
"blk.33.ffn_down.weight": "4894a923a3db75bae4496ba3ce5f28796ad31fe33996a066271fb8654964310e",
"blk.33.ffn_gate.weight": "8f6c819b8bbfbe3357fae89e1ac5a3d58be85b3b04be3bacf7b62775869046ff",
"blk.33.ffn_up.weight": "257c3544b5b544fd5d839665bf5caf107a329b59dbc3751efcaa24ae63c56179",
"blk.34.attn_k.weight": "b6cd8bba892e38dac4a2ebc3ba1bce49e71b967fc436fde30c6d76f54a18935f",
"blk.34.attn_norm.weight": "2b3c8e60a064cba9955752bbbbdd92c71ba5c2f1bd721097bdbe88b5abc68787",
"blk.34.attn_output.weight": "8cc272551c9aaca9db5a660c6927bab94a0243d74a30b2bc165f06bd577714ea",
"blk.34.attn_q.weight": "74b561eb4792484e6a94b58fe2583848c3ae28ff2f1bf3d02939a0cfdfa49990",
"blk.34.attn_v.weight": "dba19e24ff05154dc5a1f55c023729303a583d13d68732ce22ea74d4410dc8f0",
"blk.34.ffn_down.weight": "76eca5dfeb274c35774e0bf9f22ee420ed9085c8e99aa2cd5a236e4918b44c61",
"blk.34.ffn_gate.weight": "9af0862d5fcbc24732846488e653db8242a467765c0cdbc00332b3a40256b4a6",
"blk.34.ffn_up.weight": "2a03126bf73587eaba99ece2066103d12e47bcd4ce30ff6c17b2f383b81d40df",
"blk.35.attn_k.weight": "52513fc0cd4e997a842729af7d21dd09399bce0a339558374738be266d0fa2f0",
"blk.35.attn_norm.weight": "e5281fa911964263ccf1630b14762edbd41d0b9472d6ec695fc600fed4892c35",
"blk.35.attn_output.weight": "b391d6705d5dc6f48326b5fd16573f679edf64109d86fb729a498819676590ca",
"blk.35.attn_q.weight": "d16446921966db9b0e0539626ad22a2511ace780e59379d6a4162d8c5441440b",
"blk.35.attn_v.weight": "9d8cdf23ffdb0c5c74106843390b94b24c9f33ef0eb9998d39f78c73390101ea",
"blk.35.ffn_down.weight": "938eb6301f7bbf162d7dd965682a5ed11d0a4a530c6fedd7e5469ce80012fc17",
"blk.35.ffn_gate.weight": "5ad84f5a0c8edcfea1ecf1a3e3d21d85ceda0c4ad9e3c6ca68885eeff8ed3c2f",
"blk.35.ffn_up.weight": "1c4330d9dc71bf4c98812c34356c51f520f47610a534152aa6d29284b758090d",
"blk.36.attn_k.weight": "ef720655e5ca2465f13db2dfc4732fb4ef2c9d53acde52f514fd4f301e974081",
"blk.36.attn_norm.weight": "88f4b9310b3c8c2644e3029160cd35678c79dfa59280430e03f5c29a6fe84a58",
"blk.36.attn_output.weight": "aec6f915fffd7bb72cd783273e871b4f09605950089d45e72059d1316b6c4b01",
"blk.36.attn_q.weight": "72f9408a2405d42f8db6ce5fcf1d26a3660b6f225fc60e77d0277109cfcb82ed",
"blk.36.attn_v.weight": "0f3b3d851dc44b3893ef53f6cca5b4acc9658bacfe1cc2d13c3d704ddd409b67",
"blk.36.ffn_down.weight": "470aec48ce8c5129a6654d9fd26fcae72776f9fc1429a8bb05818072a876475d",
"blk.36.ffn_gate.weight": "7f5f296d09cf55679767b5d15de3eff489c456782119f25204be4b1647f18dcf",
"blk.36.ffn_up.weight": "b7ef74a1f7ffb4982711d93f1787be3a70edc3d2358d5203c41d8900508037d4",
"blk.37.attn_k.weight": "c4ffa5412e4ff2dcfe1aed991c1f54169fd171a4c7638e4b9f21a1ca64c5e1d6",
"blk.37.attn_norm.weight": "4eb6c888d841cccfacf5b963f8611120f6ff24b84af0b5714fd9ab36dcda422f",
"blk.37.attn_output.weight": "db2a7bbf9682f9f6eea672dae8e150738f1bf74dbc80edc7022017a3f040c8ac",
"blk.37.attn_q.weight": "e38c0462aff139afcbab289189823527e453abc9e541154adde5e7af88cacf0b",
"blk.37.attn_v.weight": "952eb2492ed452a72f96bcc12d4b2affad9dfdf46ee39ce4a5d7b57a5dc301e5",
"blk.37.ffn_down.weight": "25f23a8fbc44febf6dc4848fd7fe03a580e2822bd3b3b5a51f4990826bfe3e4e",
"blk.37.ffn_gate.weight": "707da5eb40118b035305d3262444382351f170a20a537386a70e90c5a83a7817",
"blk.37.ffn_up.weight": "d2d2ba5cfc4ef47338dd7384219e22bf030a5a2209e0354d88f5bbaaafd20e87",
"blk.38.attn_k.weight": "abc4bb189dedf7ce661e79028427623a4f91ac091c2cd60e31b58bc62b1cda71",
"blk.38.attn_norm.weight": "9f4803a7d03fd40fcb83d85f84eb1d5682ea4e5bb084f210c02850675d804c3d",
"blk.38.attn_output.weight": "77cb66007f1a41df7135d0e7f900ceb499c2f667dfc3f1a6ac01a3203bbd3ccf",
"blk.38.attn_q.weight": "d94a8b26cd375bf2bcaa76597e314aa8268ee50a479d00931e5e0e021feadb5d",
"blk.38.attn_v.weight": "660c907888bc5016dc69b7d35fe6f55c7ded697c93be0e2d332a2f17aff88758",
"blk.38.ffn_down.weight": "6f06173bae5b00ffaf88ef383619a8b9c6a8d0d5c6494695d17f6c1de1a68a13",
"blk.38.ffn_gate.weight": "89f99be149d03f116527bfcabe073c50001c874de40fb6e817f6619027f3cd05",
"blk.38.ffn_up.weight": "8d57557c8d5e2d2688b73f01dddf1ce8d5194990cda6358153320aea88aac7f8",
"blk.39.attn_k.weight": "21be09c988b46c8393e6c2ec9230f3b5136eb7607dd1953ba92d0811c2f0dd75",
"blk.39.attn_norm.weight": "ba7c1912dd1c4e2d16917201f62396fd0600e4a451137eaddff255548c209abd",
"blk.39.attn_output.weight": "acfaf4abb3fd27fd899b5563c3877f176b597d8f6cdb2f2fd3f3a0bd4da15ed6",
"blk.39.attn_q.weight": "e8adbc140d4c8f0db2a27ca584c5531d5b1e080555fe627e34d80d0814a92bed",
"blk.39.attn_v.weight": "92f96b0e1f724e73a0f90a76c145654418844c04a6d4b14c05eb5af8a62bf8dc",
"blk.39.ffn_down.weight": "4d9ee7c65fc16fe95d10c47b79ac6a525741947600a64b5fcea5d300a82c50de",
"blk.39.ffn_gate.weight": "7e18507989f39b32191133d2657c2ee3b74f42f070579204d727eb72215793d1",
"blk.39.ffn_up.weight": "22cda752269c9757ba918abede1df95bb0f83a5c772dea13c8deea3d5f2723d9",
"output_norm.weight": "2858cf0e39d32caf52b7861378ace076000241e147f10b9eb21d8a5cd149e3cb"
}

View File

@@ -718,23 +718,18 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
func LibraryDirs() []string { func LibraryDirs() []string {
// dependencies can exist wherever we found the runners (e.g. build tree for developers) and relative to the executable // dependencies can exist wherever we found the runners (e.g. build tree for developers) and relative to the executable
// This can be simplified once we no longer carry runners as payloads // This can be simplified once we no longer carry runners as payloads
paths := []string{} exe, err := os.Executable()
appExe, err := os.Executable()
if err != nil { if err != nil {
slog.Warn("failed to lookup executable path", "error", err) slog.Warn("failed to lookup executable path", "error", err)
} else { return nil
appRelative := filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe(), "lib", "ollama")
if _, err := os.Stat(appRelative); err == nil {
paths = append(paths, appRelative)
}
} }
rDir := runners.Locate()
if err != nil { lib := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama")
slog.Warn("unable to locate gpu dependency libraries", "error", err) if _, err := os.Stat(lib); err != nil {
} else { return nil
paths = append(paths, filepath.Dir(rDir))
} }
return paths
return []string{lib}
} }
func GetSystemInfo() SystemInfo { func GetSystemInfo() SystemInfo {

View File

@@ -2,7 +2,7 @@
### Getting Started ### Getting Started
* [Quickstart](../README.md#quickstart) * [Quickstart](../README.md#quickstart)
* [Examples](../examples) * [Examples](./examples.md)
* [Importing models](./import.md) * [Importing models](./import.md)
* [Linux Documentation](./linux.md) * [Linux Documentation](./linux.md)
* [Windows Documentation](./windows.md) * [Windows Documentation](./windows.md)

View File

@@ -38,7 +38,7 @@ Numeric IDs may be used, however ordering may vary, so UUIDs are more reliable.
You can discover the UUID of your GPUs by running `nvidia-smi -L` If you want to You can discover the UUID of your GPUs by running `nvidia-smi -L` If you want to
ignore the GPUs and force CPU usage, use an invalid GPU ID (e.g., "-1") ignore the GPUs and force CPU usage, use an invalid GPU ID (e.g., "-1")
### Laptop Suspend Resume ### Linux Suspend Resume
On linux, after a suspend/resume cycle, sometimes Ollama will fail to discover On linux, after a suspend/resume cycle, sometimes Ollama will fail to discover
your NVIDIA GPU, and fallback to running on the CPU. You can workaround this your NVIDIA GPU, and fallback to running on the CPU. You can workaround this

View File

@@ -165,6 +165,8 @@ var (
IntelGPU = Bool("OLLAMA_INTEL_GPU") IntelGPU = Bool("OLLAMA_INTEL_GPU")
// MultiUserCache optimizes prompt caching for multi-user scenarios // MultiUserCache optimizes prompt caching for multi-user scenarios
MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE") MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
// Enable the new Ollama engine
NewRunners = Bool("OLLAMA_NEW_RUNNERS")
) )
func String(s string) func() string { func String(s string) func() string {
@@ -250,6 +252,7 @@ func AsMap() map[string]EnvVar {
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"}, "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"}, "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"}, "OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
"OLLAMA_NEW_RUNNERS": {"OLLAMA_NEW_RUNNERS", NewRunners(), "Enable the new Ollama engine"},
// Informational // Informational
"HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"}, "HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},

View File

@@ -1,15 +1,15 @@
package llm package ggml
import ( import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"fmt" "fmt"
"io" "io"
"log/slog"
"slices" "slices"
"strings" "strings"
"sync"
"github.com/ollama/ollama/util/bufioutil" "github.com/ollama/ollama/fs/util/bufioutil"
) )
type GGML struct { type GGML struct {
@@ -19,145 +19,168 @@ type GGML struct {
type model interface { type model interface {
KV() KV KV() KV
Tensors() *Tensors Tensors() Tensors
} }
type KV map[string]any type KV map[string]any
func (kv KV) u64(key string) uint64 {
switch v := kv[key].(type) {
case uint64:
return v
case uint32:
return uint64(v)
case float64:
return uint64(v)
default:
return 0
}
}
func (kv KV) Architecture() string { func (kv KV) Architecture() string {
if s, ok := kv["general.architecture"].(string); ok { return kv.String("general.architecture", "unknown")
return s
}
return "unknown"
} }
func (kv KV) Kind() string { func (kv KV) Kind() string {
if s, ok := kv["general.type"].(string); ok { return kv.String("general.type", "unknown")
return s
}
return "unknown"
} }
func (kv KV) ParameterCount() uint64 { func (kv KV) ParameterCount() uint64 {
return kv.u64("general.parameter_count") return keyValue[uint64](kv, "general.parameter_count")
} }
func (kv KV) FileType() fileType { func (kv KV) FileType() fileType {
if u64 := kv.u64("general.file_type"); u64 > 0 { if t := kv.Uint("general.file_type"); t > 0 {
return fileType(uint32(u64)) return fileType(t)
} }
return fileTypeUnknown return fileTypeUnknown
} }
func (kv KV) BlockCount() uint64 { func (kv KV) BlockCount() uint64 {
return kv.u64(fmt.Sprintf("%s.block_count", kv.Architecture())) return uint64(kv.Uint("block_count"))
}
func (kv KV) EmbeddingLength() uint64 {
return uint64(kv.Uint("embedding_length"))
} }
func (kv KV) HeadCount() uint64 { func (kv KV) HeadCount() uint64 {
return kv.u64(fmt.Sprintf("%s.attention.head_count", kv.Architecture())) return uint64(kv.Uint("attention.head_count"))
} }
func (kv KV) HeadCountKV() uint64 { func (kv KV) HeadCountKV() uint64 {
if headCountKV := kv.u64(fmt.Sprintf("%s.attention.head_count_kv", kv.Architecture())); headCountKV > 0 { return uint64(kv.Uint("attention.head_count_kv", 1))
return headCountKV
}
return 1
} }
func (kv KV) EmbeddingHeadCount() uint64 { func (kv KV) EmbeddingHeadCount() uint64 {
if heads := kv.HeadCount(); heads > 0 { if heads := kv.HeadCount(); heads > 0 {
return kv.EmbeddingLength() / kv.HeadCount() return kv.EmbeddingLength() / heads
} }
return 0 return 0
} }
func (kv KV) EmbeddingHeadCountK() uint64 { func (kv KV) EmbeddingHeadCountK() uint64 {
if k := kv.u64(fmt.Sprintf("%s.attention.key_length", kv.Architecture())); k > 0 { return uint64(kv.Uint("attention.key_length", uint32(kv.EmbeddingHeadCount())))
return k
}
return kv.EmbeddingHeadCount()
} }
func (kv KV) EmbeddingHeadCountV() uint64 { func (kv KV) EmbeddingHeadCountV() uint64 {
if v := kv.u64(fmt.Sprintf("%s.attention.value_length", kv.Architecture())); v > 0 { return uint64(kv.Uint("attention.value_length", uint32(kv.EmbeddingHeadCount())))
return v
}
return kv.EmbeddingHeadCount()
} }
func (kv KV) GQA() uint64 { func (kv KV) GQA() uint64 {
return kv.HeadCount() / kv.HeadCountKV() return kv.HeadCount() / kv.HeadCountKV()
} }
func (kv KV) EmbeddingLength() uint64 {
return kv.u64(fmt.Sprintf("%s.embedding_length", kv.Architecture()))
}
func (kv KV) ContextLength() uint64 { func (kv KV) ContextLength() uint64 {
return kv.u64(fmt.Sprintf("%s.context_length", kv.Architecture())) return uint64(kv.Uint("context_length"))
} }
func (kv KV) ChatTemplate() string { func (kv KV) ChatTemplate() string {
s, _ := kv["tokenizer.chat_template"].(string) return kv.String("tokenizer.chat_template")
}
func (kv KV) String(key string, defaultValue ...string) string {
return keyValue(kv, key, append(defaultValue, "")...)
}
func (kv KV) Uint(key string, defaultValue ...uint32) uint32 {
return keyValue(kv, key, append(defaultValue, 0)...)
}
func (kv KV) Float(key string, defaultValue ...float32) float32 {
return keyValue(kv, key, append(defaultValue, 0)...)
}
func (kv KV) Strings(key string, defaultValue ...[]string) []string {
r := keyValue(kv, key, &array{})
s := make([]string, r.size)
for i := range r.size {
s[i] = r.values[i].(string)
}
return s return s
} }
type Tensors struct { func (kv KV) Uints(key string, defaultValue ...[]uint32) []uint32 {
Items []*Tensor r := keyValue(kv, key, &array{})
Offset uint64 s := make([]uint32, r.size)
for i := range r.size {
s[i] = uint32(r.values[i].(int32))
}
layers map[string]Layer return s
layersOnce sync.Once
} }
func (ts *Tensors) Layers() map[string]Layer { func keyValue[T string | uint32 | uint64 | float32 | *array](kv KV, key string, defaultValue ...T) T {
ts.layersOnce.Do(func() { if !strings.HasPrefix(key, "tokenizer.") && !strings.HasPrefix(key, "general.") {
ts.layers = make(map[string]Layer) key = kv.Architecture() + "." + key
for _, t := range ts.Items { }
parts := strings.Split(t.Name, ".")
if index := slices.IndexFunc(parts, func(s string) bool { return s == "blk" || s == "mm" }); index != -1 {
if len(parts) > index+2 {
// blk and mm should have a number after them, join it
parts = append(
[]string{strings.Join(parts[:index+2], ".")},
parts[index+2:]...)
}
}
if _, ok := ts.layers[parts[0]]; !ok { if val, ok := kv[key]; ok {
ts.layers[parts[0]] = make(Layer) return val.(T)
} }
ts.layers[parts[0]][strings.Join(parts[1:], ".")] = t slog.Warn("key not found", "key", key, "default", defaultValue[0])
return defaultValue[0]
}
type Tensors struct {
items []*Tensor
Offset uint64
}
func (s Tensors) Items(prefix ...string) []*Tensor {
if len(prefix) == 0 {
return s.items
}
var items []*Tensor
for _, t := range s.items {
if strings.HasPrefix(t.Name, prefix[0]) {
items = append(items, t)
} }
}) }
return ts.layers return items
}
func (ts Tensors) Layers() map[string]Layer {
layers := make(map[string]Layer)
for _, t := range ts.items {
parts := strings.Split(t.Name, ".")
if i := slices.Index(parts, "blk"); i > 0 {
parts = append([]string{
strings.Join(parts[:i], "."),
strings.Join(parts[i:i+2], "."),
}, parts[i+2:]...)
} else if i == 0 {
parts = append([]string{
strings.Join(parts[i:i+2], "."),
}, parts[i+2:]...)
}
if _, ok := layers[parts[0]]; !ok {
layers[parts[0]] = make(Layer)
}
layers[parts[0]][strings.Join(parts[1:], ".")] = t
}
return layers
} }
type Layer map[string]*Tensor type Layer map[string]*Tensor
func (l Layer) size() (size uint64) { func (l Layer) Size() (size uint64) {
for _, t := range l { for _, t := range l {
size += t.Size() size += t.Size()
} }
@@ -255,8 +278,6 @@ func (t Tensor) typeSize() uint64 {
return 8 return 8
case 29: // IQ1_M case 29: // IQ1_M
return blockSize/8 + blockSize/16 + blockSize/32 return blockSize/8 + blockSize/16 + blockSize/32
case 30: // BF16
return 2
default: default:
return 0 return 0
} }
@@ -295,7 +316,7 @@ const (
var ErrUnsupportedFormat = errors.New("unsupported model format") var ErrUnsupportedFormat = errors.New("unsupported model format")
func DetectGGMLType(b []byte) string { func DetectContentType(b []byte) string {
switch binary.LittleEndian.Uint32(b[:4]) { switch binary.LittleEndian.Uint32(b[:4]) {
case FILE_MAGIC_GGML: case FILE_MAGIC_GGML:
return "ggml" return "ggml"
@@ -312,12 +333,12 @@ func DetectGGMLType(b []byte) string {
} }
} }
// DecodeGGML decodes a GGML model from the given reader. // Decode decodes a GGML model from the given reader.
// //
// It collects array values for arrays with a size less than or equal to // It collects array values for arrays with a size less than or equal to
// maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If // maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If
// the maxArraySize is negative, all arrays are collected. // the maxArraySize is negative, all arrays are collected.
func DecodeGGML(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) { func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
if maxArraySize == 0 { if maxArraySize == 0 {
maxArraySize = 1024 maxArraySize = 1024
} }
@@ -331,10 +352,6 @@ func DecodeGGML(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
var c container var c container
switch magic { switch magic {
case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT:
return nil, 0, ErrUnsupportedFormat
case FILE_MAGIC_GGLA:
c = &containerGGLA{}
case FILE_MAGIC_GGUF_LE: case FILE_MAGIC_GGUF_LE:
c = &containerGGUF{ByteOrder: binary.LittleEndian, maxArraySize: maxArraySize} c = &containerGGUF{ByteOrder: binary.LittleEndian, maxArraySize: maxArraySize}
case FILE_MAGIC_GGUF_BE: case FILE_MAGIC_GGUF_BE:
@@ -530,21 +547,20 @@ func (llm GGML) GraphSize(context, batch uint64, kvCacheType string) (kv, partia
} }
// SupportsKVCacheType checks if the requested cache type is supported // SupportsKVCacheType checks if the requested cache type is supported
func (ggml GGML) SupportsKVCacheType(cacheType string) bool { func (llm GGML) SupportsKVCacheType(cacheType string) bool {
validKVCacheTypes := []string{"f16", "q8_0", "q4_0"} return slices.Contains([]string{"f16", "q8_0", "q4_0"}, cacheType)
return slices.Contains(validKVCacheTypes, cacheType)
} }
// SupportsFlashAttention checks if the model supports flash attention // SupportsFlashAttention checks if the model supports flash attention
func (ggml GGML) SupportsFlashAttention() bool { func (llm GGML) SupportsFlashAttention() bool {
_, isEmbedding := ggml.KV()[fmt.Sprintf("%s.pooling_type", ggml.KV().Architecture())] _, isEmbedding := llm.KV()[fmt.Sprintf("%s.pooling_type", llm.KV().Architecture())]
if isEmbedding { if isEmbedding {
return false return false
} }
// Check head counts match and are non-zero // Check head counts match and are non-zero
headCountK := ggml.KV().EmbeddingHeadCountK() headCountK := llm.KV().EmbeddingHeadCountK()
headCountV := ggml.KV().EmbeddingHeadCountV() headCountV := llm.KV().EmbeddingHeadCountV()
return headCountK != 0 && headCountV != 0 && headCountK == headCountV return headCountK != 0 && headCountV != 0 && headCountK == headCountV
} }

View File

@@ -1,4 +1,4 @@
package llm package ggml
import ( import (
"bytes" "bytes"
@@ -8,10 +8,9 @@ import (
"fmt" "fmt"
"io" "io"
"log/slog" "log/slog"
"maps"
"slices" "slices"
"strings" "strings"
"golang.org/x/exp/maps"
) )
type containerGGUF struct { type containerGGUF struct {
@@ -110,9 +109,9 @@ func (llm *gguf) KV() KV {
return llm.kv return llm.kv
} }
func (llm *gguf) Tensors() *Tensors { func (llm *gguf) Tensors() Tensors {
return &Tensors{ return Tensors{
Items: llm.tensors, items: llm.tensors,
Offset: llm.tensorOffset, Offset: llm.tensorOffset,
} }
} }
@@ -523,7 +522,7 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
return err return err
} }
keys := maps.Keys(kv) keys := slices.Collect(maps.Keys(kv))
slices.Sort(keys) slices.Sort(keys)
for _, key := range keys { for _, key := range keys {

View File

@@ -1,4 +1,4 @@
package llm package ggml
import "fmt" import "fmt"
@@ -32,10 +32,9 @@ const (
fileTypeIQ1_S fileTypeIQ1_S
fileTypeIQ4_NL fileTypeIQ4_NL
fileTypeIQ3_S fileTypeIQ3_S
fileTypeIQ3_M
fileTypeIQ2_S fileTypeIQ2_S
fileTypeIQ2_M
fileTypeIQ4_XS fileTypeIQ4_XS
fileTypeIQ2_M
fileTypeIQ1_M fileTypeIQ1_M
fileTypeBF16 fileTypeBF16
@@ -94,8 +93,6 @@ func ParseFileType(s string) (fileType, error) {
return fileTypeIQ4_NL, nil return fileTypeIQ4_NL, nil
case "IQ3_S": case "IQ3_S":
return fileTypeIQ3_S, nil return fileTypeIQ3_S, nil
case "IQ3_M":
return fileTypeIQ3_M, nil
case "IQ2_S": case "IQ2_S":
return fileTypeIQ2_S, nil return fileTypeIQ2_S, nil
case "IQ4_XS": case "IQ4_XS":
@@ -163,8 +160,6 @@ func (t fileType) String() string {
return "IQ4_NL" return "IQ4_NL"
case fileTypeIQ3_S: case fileTypeIQ3_S:
return "IQ3_S" return "IQ3_S"
case fileTypeIQ3_M:
return "IQ3_M"
case fileTypeIQ2_S: case fileTypeIQ2_S:
return "IQ2_S" return "IQ2_S"
case fileTypeIQ4_XS: case fileTypeIQ4_XS:

3
go.mod
View File

@@ -17,12 +17,14 @@ require (
require ( require (
github.com/agnivade/levenshtein v1.1.1 github.com/agnivade/levenshtein v1.1.1
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1 github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
github.com/dlclark/regexp2 v1.11.4
github.com/emirpasic/gods/v2 v2.0.0-alpha github.com/emirpasic/gods/v2 v2.0.0-alpha
github.com/google/go-cmp v0.6.0 github.com/google/go-cmp v0.6.0
github.com/mattn/go-runewidth v0.0.14 github.com/mattn/go-runewidth v0.0.14
github.com/nlpodyssey/gopickle v0.3.0 github.com/nlpodyssey/gopickle v0.3.0
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
golang.org/x/image v0.22.0 golang.org/x/image v0.22.0
gonum.org/v1/gonum v0.15.0
) )
require ( require (
@@ -42,7 +44,6 @@ require (
github.com/xtgo/set v1.0.0 // indirect github.com/xtgo/set v1.0.0 // indirect
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
gonum.org/v1/gonum v0.15.0 // indirect
gorgonia.org/vecf32 v0.9.0 // indirect gorgonia.org/vecf32 v0.9.0 // indirect
gorgonia.org/vecf64 v0.9.0 // indirect gorgonia.org/vecf64 v0.9.0 // indirect
) )

2
go.sum
View File

@@ -42,6 +42,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g= github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g=
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/emirpasic/gods/v2 v2.0.0-alpha h1:dwFlh8pBg1VMOXWGipNMRt8v96dKAIvBehtCt6OtunU= github.com/emirpasic/gods/v2 v2.0.0-alpha h1:dwFlh8pBg1VMOXWGipNMRt8v96dKAIvBehtCt6OtunU=
github.com/emirpasic/gods/v2 v2.0.0-alpha/go.mod h1:W0y4M2dtBB9U5z3YlghmpuUhiaZT2h6yoeE+C1sCp6A= github.com/emirpasic/gods/v2 v2.0.0-alpha/go.mod h1:W0y4M2dtBB9U5z3YlghmpuUhiaZT2h6yoeE+C1sCp6A=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=

View File

@@ -37,8 +37,7 @@ go build -tags avx .
```shell ```shell
# go doesn't recognize `-mfma` as a valid compiler flag # go doesn't recognize `-mfma` as a valid compiler flag
# see https://github.com/golang/go/issues/17895 # see https://github.com/golang/go/issues/17895
go env -w "CGO_CFLAGS_ALLOW=-mfma|-mf16c" go env -w "CGO_CPPFLAGS_ALLOW=-mfma|-mf16c"
go env -w "CGO_CXXFLAGS_ALLOW=-mfma|-mf16c"
go build -tags=avx,avx2 . go build -tags=avx,avx2 .
``` ```

34
llama/amx.h vendored
View File

@@ -1,34 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "ggml-backend.h"
#include "ggml-cpu-impl.h"
// GGML internal header
#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);
#endif

51
llama/ggml-blas.h vendored
View File

@@ -1,51 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
// number of threads used for conversion to float
// for openblas and blis, this will also set the number of threads used for blas operations
GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
#ifdef __cplusplus
}
#endif

View File

@@ -1,34 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "ggml-cpu-traits.h"
#include "ggml.h"
// GGML internal header
ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);

View File

@@ -1,64 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "ggml-backend-impl.h"
#include "ggml-cpu-impl.h"
#include "ggml.h"
#ifdef __cplusplus
# include <vector>
extern "C" {
#endif
// return true if op part of extra "accelerator"
bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op);
bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size);
#ifdef __cplusplus
}
namespace ggml::cpu {
// register in tensor->extra
class tensor_traits {
public:
virtual ~tensor_traits();
virtual bool work_size(int n_threads, const struct ggml_tensor * op, size_t & size) = 0;
virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) = 0;
};
class extra_buffer_type {
public:
virtual ~extra_buffer_type();
virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0;
virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op) = 0;
};
} // namespace ggml::cpu
// implemented in ggml-cpu.cpp.
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffers_type();
#endif

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_ACC_BLOCK_SIZE 256
void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,60 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "arange.cuh"
static __global__ void arange_f32(float * dst, const int ne0, const float start, const float step) {
// blockIDx.x: idx of ne0 / BLOCK_SIZE
int nidx = threadIdx.x + blockIdx.x * blockDim.x;
if (nidx >= ne0) {
return;
}
dst[nidx] = start + step * nidx;
}
static void arange_f32_cuda(float * dst, const int ne0, const float start, const float step, cudaStream_t stream) {
int num_blocks = (ne0 + CUDA_ARANGE_BLOCK_SIZE - 1) / CUDA_ARANGE_BLOCK_SIZE;
arange_f32<<<num_blocks, CUDA_ARANGE_BLOCK_SIZE, 0, stream>>>(dst, ne0, start, step);
}
void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
float * dst_d = (float *)dst->data;
cudaStream_t stream = ctx.stream();
GGML_ASSERT(dst->type == GGML_TYPE_F32);
float start;
float stop;
float step;
memcpy(&start, (float *)dst->op_params + 0, sizeof(float));
memcpy(&stop, (float *)dst->op_params + 1, sizeof(float));
memcpy(&step, (float *)dst->op_params + 2, sizeof(float));
int64_t steps = (int64_t)ceil((stop - start) / step);
GGML_ASSERT(ggml_nelements(dst) == steps);
arange_f32_cuda(dst_d, dst->ne[0], start, step, stream);
}

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_ARANGE_BLOCK_SIZE 256
void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,29 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_argmax(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,29 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,35 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_repeat_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,60 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "clamp.cuh"
static __global__ void clamp_f32(const float * x, float * dst, const float min, const float max, const int k) {
const int i = blockDim.x*blockIdx.x + threadIdx.x;
if (i >= k) {
return;
}
dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
}
static void clamp_f32_cuda(const float * x, float * dst, const float min, const float max, const int k, cudaStream_t stream) {
const int num_blocks = (k + CUDA_CLAMP_BLOCK_SIZE - 1) / CUDA_CLAMP_BLOCK_SIZE;
clamp_f32<<<num_blocks, CUDA_CLAMP_BLOCK_SIZE, 0, stream>>>(x, dst, min, max, k);
}
void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
cudaStream_t stream = ctx.stream();
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
float min;
float max;
memcpy(&min, dst->op_params, sizeof(float));
memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
clamp_f32_cuda(src0_d, dst_d, min, max, ggml_nelements(src0), stream);
}

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_CLAMP_BLOCK_SIZE 256
void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_CONCAT_BLOCK_SIZE 256
void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_CONV_TRANPOSE_1D_BLOCK_SIZE 256
void ggml_cuda_op_conv_transpose_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,39 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256
template<typename T>
using to_t_cuda_t = void (*)(const void * __restrict__ x, T * __restrict__ y, int64_t k, cudaStream_t stream);
typedef to_t_cuda_t<float> to_fp32_cuda_t;
typedef to_t_cuda_t<half> to_fp16_cuda_t;
to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type);
to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_COUNT_EQUAL_CHUNK_SIZE 128
void ggml_cuda_count_equal(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,35 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_CPY_BLOCK_SIZE 64
void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1);
void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1);

View File

@@ -1,33 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_CROSS_ENTROPY_LOSS_BLOCK_SIZE 256
void ggml_cuda_cross_entropy_loss(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_cross_entropy_loss_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32
void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,29 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_flash_attn_ext_tile_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,29 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_flash_attn_ext_tile_f32(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,29 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_GET_ROWS_BLOCK_SIZE 256
void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_IM2COL_BLOCK_SIZE 256
void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,38 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
// maximum number of src0 rows with which to use mul_mat_vec over cuBLAS if FP16 tensor cores are available
#define MMV_MAX_ROWS 512
void ggml_cuda_mul_mat_vec(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
void ggml_cuda_op_mul_mat_vec(
ggml_backend_cuda_context & ctx,
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols,
const int64_t src1_padded_row_size, cudaStream_t stream);

View File

@@ -1,35 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define MMVQ_MAX_BATCH_SIZE 8 // Max. batch size for which to use MMVQ kernels.
void ggml_cuda_op_mul_mat_vec_q(
ggml_backend_cuda_context & ctx,
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols,
const int64_t src1_padded_row_size, cudaStream_t stream);

View File

@@ -1,33 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_op_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_OPT_STEP_ADAMW_BLOCK_SIZE 256
void ggml_cuda_opt_step_adamw(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,29 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,32 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_PAD_BLOCK_SIZE 256
void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_POOL2D_BLOCK_SIZE 256
void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,50 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "common.cuh"
#include "mmq.cuh"
#include <cstdint>
#define CUDA_QUANTIZE_BLOCK_SIZE 256
#define CUDA_QUANTIZE_BLOCK_SIZE_MMQ 128
static_assert(MATRIX_ROW_PADDING % CUDA_QUANTIZE_BLOCK_SIZE == 0, "Risk of out-of-bounds access.");
static_assert(MATRIX_ROW_PADDING % (4*CUDA_QUANTIZE_BLOCK_SIZE_MMQ) == 0, "Risk of out-of-bounds access.");
typedef void (*quantize_cuda_t)(
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
const ggml_type type_x, cudaStream_t stream);
void quantize_row_q8_1_cuda(
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
const ggml_type type_x, cudaStream_t stream);
void quantize_mmq_q8_1_cuda(
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
const ggml_type type_x, cudaStream_t stream);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_ROPE_BLOCK_SIZE 256
void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,57 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "scale.cuh"
static __global__ void scale_f32(const float * x, float * dst, const float scale, const int k) {
const int i = blockDim.x*blockIdx.x + threadIdx.x;
if (i >= k) {
return;
}
dst[i] = scale * x[i];
}
static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) {
const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE;
scale_f32<<<num_blocks, CUDA_SCALE_BLOCK_SIZE, 0, stream>>>(x, dst, scale, k);
}
void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
cudaStream_t stream = ctx.stream();
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
float scale;
memcpy(&scale, dst->op_params, sizeof(float));
scale_f32_cuda(src0_d, dst_d, scale, ggml_nelements(src0), stream);
}

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_SCALE_BLOCK_SIZE 256
void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_SOFT_MAX_BLOCK_SIZE 1024
void ggml_cuda_op_soft_max(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream);
void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,65 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "sumrows.cuh"
static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) {
const int row = blockIdx.x;
const int col = threadIdx.x;
float sum = 0.0f;
for (int i = col; i < ncols; i += blockDim.x) {
sum += x[row * ncols + i];
}
sum = warp_reduce_sum(sum);
if (col == 0) {
dst[row] = sum;
}
}
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) {
const dim3 block_dims(WARP_SIZE, 1, 1);
const dim3 block_nums(nrows, 1, 1);
k_sum_rows_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
}
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
cudaStream_t stream = ctx.stream();
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
GGML_ASSERT(ggml_is_contiguous(src0));
const int64_t ncols = src0->ne[0];
const int64_t nrows = ggml_nrows(src0);
sum_rows_f32_cuda(src0_d, dst_d, ncols, nrows, stream);
}

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream);
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1);

Some files were not shown because too many files have changed in this diff Show More