mirror of
https://github.com/ollama/ollama.git
synced 2026-04-23 01:05:47 +02:00
Compare commits
6 Commits
brucemacd/
...
brucemacd/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
057cc54b66 | ||
|
|
1e438b237c | ||
|
|
d721a02e7d | ||
|
|
778603a818 | ||
|
|
3c874df46e | ||
|
|
5f8c03189e |
@@ -382,6 +382,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
|
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
|
||||||
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
|
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
|
||||||
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
|
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
|
||||||
|
- [AntSK](https://github.com/AIDotNet/AntSK) (Out-of-the-box & Adaptable RAG Chatbot)
|
||||||
|
- [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot)
|
||||||
|
|
||||||
### Cloud
|
### Cloud
|
||||||
|
|
||||||
|
|||||||
86
benchmark/ggml_backend_benchmark_test.go
Normal file
86
benchmark/ggml_backend_benchmark_test.go
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
package backend
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/ml"
|
||||||
|
"github.com/ollama/ollama/model"
|
||||||
|
"github.com/ollama/ollama/server"
|
||||||
|
|
||||||
|
_ "github.com/ollama/ollama/model/models/llama"
|
||||||
|
)
|
||||||
|
|
||||||
|
var modelName = flag.String("m", "", "Name of the model to benchmark")
|
||||||
|
|
||||||
|
func suppressOutput() (cleanup func()) {
|
||||||
|
oldStdout, oldStderr := os.Stdout, os.Stderr
|
||||||
|
os.Stdout, os.Stderr = nil, nil
|
||||||
|
log.SetOutput(io.Discard)
|
||||||
|
|
||||||
|
return func() {
|
||||||
|
os.Stdout, os.Stderr = oldStdout, oldStderr
|
||||||
|
log.SetOutput(os.Stderr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupModel(b *testing.B) model.Model {
|
||||||
|
if *modelName == "" {
|
||||||
|
b.Fatal("Error: -m flag is required for benchmark tests")
|
||||||
|
}
|
||||||
|
|
||||||
|
sm, err := server.GetModel(*modelName)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
m, err := model.New(sm.ModelPath)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
m.Config().Cache.Init(m.Backend(), ml.DTypeF32, 2048)
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkGGMLOperations(b *testing.B) {
|
||||||
|
// loading the GGML back-end logs to standard out and makes the bench output messy
|
||||||
|
cleanup := suppressOutput()
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
b.Setenv("OLLAMA_BENCHMARK", "1")
|
||||||
|
b.Setenv("OLLAMA_BACKEND", "ggml")
|
||||||
|
|
||||||
|
m := setupModel(b)
|
||||||
|
|
||||||
|
// Sample input data
|
||||||
|
inputIDs := []int32{1, 2, 3, 4, 5}
|
||||||
|
options := model.Options{
|
||||||
|
Inputs: inputIDs,
|
||||||
|
Positions: []int32{1, 2, 3, 4, 5},
|
||||||
|
Sequences: []int{1, 1, 1, 1, 1},
|
||||||
|
Outputs: []int32{int32(len(inputIDs) - 1)},
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
for range b.N {
|
||||||
|
ctx := m.Backend().NewContext()
|
||||||
|
defer ctx.Close()
|
||||||
|
|
||||||
|
modelOutput, err := model.Forward(ctx, m, options)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(fmt.Errorf("forward pass failed: %v", err))
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.Compute(modelOutput)
|
||||||
|
|
||||||
|
for _, op := range ctx.Timing() {
|
||||||
|
b.ReportMetric(op.Duration, fmt.Sprintf("%s_ms", op.Type))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -10,6 +10,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
"github.com/google/go-cmp/cmp"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
@@ -490,6 +491,96 @@ func TestPushHandler(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestListHandler(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
args []string
|
||||||
|
serverResponse []api.ListModelResponse
|
||||||
|
expectedError string
|
||||||
|
expectedOutput string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "list all models",
|
||||||
|
args: []string{},
|
||||||
|
serverResponse: []api.ListModelResponse{
|
||||||
|
{Name: "model1", Digest: "sha256:abc123", Size: 1024, ModifiedAt: time.Now().Add(-24 * time.Hour)},
|
||||||
|
{Name: "model2", Digest: "sha256:def456", Size: 2048, ModifiedAt: time.Now().Add(-48 * time.Hour)},
|
||||||
|
},
|
||||||
|
expectedOutput: "NAME ID SIZE MODIFIED \n" +
|
||||||
|
"model1 sha256:abc12 1.0 KB 24 hours ago \n" +
|
||||||
|
"model2 sha256:def45 2.0 KB 2 days ago \n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "filter models by prefix",
|
||||||
|
args: []string{"model1"},
|
||||||
|
serverResponse: []api.ListModelResponse{
|
||||||
|
{Name: "model1", Digest: "sha256:abc123", Size: 1024, ModifiedAt: time.Now().Add(-24 * time.Hour)},
|
||||||
|
{Name: "model2", Digest: "sha256:def456", Size: 2048, ModifiedAt: time.Now().Add(-24 * time.Hour)},
|
||||||
|
},
|
||||||
|
expectedOutput: "NAME ID SIZE MODIFIED \n" +
|
||||||
|
"model1 sha256:abc12 1.0 KB 24 hours ago \n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "server error",
|
||||||
|
args: []string{},
|
||||||
|
expectedError: "server error",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path != "/api/tags" || r.Method != http.MethodGet {
|
||||||
|
t.Errorf("unexpected request to %s %s", r.Method, r.URL.Path)
|
||||||
|
http.Error(w, "not found", http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if tt.expectedError != "" {
|
||||||
|
http.Error(w, tt.expectedError, http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
response := api.ListResponse{Models: tt.serverResponse}
|
||||||
|
if err := json.NewEncoder(w).Encode(response); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer mockServer.Close()
|
||||||
|
|
||||||
|
t.Setenv("OLLAMA_HOST", mockServer.URL)
|
||||||
|
|
||||||
|
cmd := &cobra.Command{}
|
||||||
|
cmd.SetContext(context.TODO())
|
||||||
|
|
||||||
|
// Capture stdout
|
||||||
|
oldStdout := os.Stdout
|
||||||
|
r, w, _ := os.Pipe()
|
||||||
|
os.Stdout = w
|
||||||
|
|
||||||
|
err := ListHandler(cmd, tt.args)
|
||||||
|
|
||||||
|
// Restore stdout and get output
|
||||||
|
w.Close()
|
||||||
|
os.Stdout = oldStdout
|
||||||
|
output, _ := io.ReadAll(r)
|
||||||
|
|
||||||
|
if tt.expectedError == "" {
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("expected no error, got %v", err)
|
||||||
|
}
|
||||||
|
if got := string(output); got != tt.expectedOutput {
|
||||||
|
t.Errorf("expected output:\n%s\ngot:\n%s", tt.expectedOutput, got)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err == nil || !strings.Contains(err.Error(), tt.expectedError) {
|
||||||
|
t.Errorf("expected error containing %q, got %v", tt.expectedError, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestCreateHandler(t *testing.T) {
|
func TestCreateHandler(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
|
|||||||
@@ -167,6 +167,8 @@ var (
|
|||||||
MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
|
MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
|
||||||
// Enable the new Ollama engine
|
// Enable the new Ollama engine
|
||||||
NewEngine = Bool("OLLAMA_NEW_ENGINE")
|
NewEngine = Bool("OLLAMA_NEW_ENGINE")
|
||||||
|
// Ollama is running in a benchmark context, additional timing data will be collected.
|
||||||
|
Benchmark = Bool("OLLAMA_BENCHMARK")
|
||||||
)
|
)
|
||||||
|
|
||||||
func String(s string) func() string {
|
func String(s string) func() string {
|
||||||
|
|||||||
@@ -352,6 +352,10 @@ func (c *testContext) MaxTensors() int {
|
|||||||
return 10
|
return 10
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *testContext) Timing() []ml.OpTiming {
|
||||||
|
return []ml.OpTiming{}
|
||||||
|
}
|
||||||
|
|
||||||
func (c *testContext) Close() {}
|
func (c *testContext) Close() {}
|
||||||
|
|
||||||
type testTensor struct {
|
type testTensor struct {
|
||||||
|
|||||||
24
llama/patches/0018-remove-amx.patch
Normal file
24
llama/patches/0018-remove-amx.patch
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Michael Yang <mxyng@pm.me>
|
||||||
|
Date: Tue, 18 Feb 2025 14:47:21 -0800
|
||||||
|
Subject: [PATCH] remove amx
|
||||||
|
|
||||||
|
---
|
||||||
|
ggml/src/CMakeLists.txt | 4 ----
|
||||||
|
1 file changed, 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
|
||||||
|
index 72b488dd..50828717 100644
|
||||||
|
--- a/ggml/src/CMakeLists.txt
|
||||||
|
+++ b/ggml/src/CMakeLists.txt
|
||||||
|
@@ -293,10 +293,6 @@ if (GGML_CPU_ALL_VARIANTS)
|
||||||
|
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
||||||
|
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
||||||
|
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
||||||
|
- if (NOT MSVC)
|
||||||
|
- # MSVC doesn't support AMX
|
||||||
|
- ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
||||||
|
- endif()
|
||||||
|
else ()
|
||||||
|
ggml_add_cpu_backend_variant_impl("")
|
||||||
|
endif()
|
||||||
@@ -2,6 +2,7 @@ package ml
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"cmp"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
@@ -37,7 +38,7 @@ func RegisterBackend(name string, f func(*os.File) (Backend, error)) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func NewBackend(f *os.File) (Backend, error) {
|
func NewBackend(f *os.File) (Backend, error) {
|
||||||
if backend, ok := backends["ggml"]; ok {
|
if backend, ok := backends[cmp.Or(os.Getenv("OLLAMA_BACKEND"), "ggml")]; ok {
|
||||||
return backend(f)
|
return backend(f)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -53,6 +54,30 @@ type Context interface {
|
|||||||
Compute(...Tensor)
|
Compute(...Tensor)
|
||||||
MaxTensors() int
|
MaxTensors() int
|
||||||
Close()
|
Close()
|
||||||
|
|
||||||
|
Timing() []OpTiming
|
||||||
|
}
|
||||||
|
|
||||||
|
// OpType is the type of operation performed during a forward pass.
|
||||||
|
type OpType string
|
||||||
|
|
||||||
|
const (
|
||||||
|
View OpType = "View"
|
||||||
|
Copy OpType = "Copy"
|
||||||
|
Reshape OpType = "Reshape"
|
||||||
|
Permute OpType = "Permute"
|
||||||
|
Contiguous OpType = "Contiguous"
|
||||||
|
Input OpType = "Input"
|
||||||
|
ComputeOp OpType = "Compute"
|
||||||
|
Transpose OpType = "Transpose"
|
||||||
|
)
|
||||||
|
|
||||||
|
// OpTiming stores the timing information for a single operation.
|
||||||
|
type OpTiming struct {
|
||||||
|
Type OpType
|
||||||
|
Operation string
|
||||||
|
Duration float64
|
||||||
|
Order int
|
||||||
}
|
}
|
||||||
|
|
||||||
type Tensor interface {
|
type Tensor interface {
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ package ggml
|
|||||||
#cgo CPPFLAGS: -I${SRCDIR}/ggml/include
|
#cgo CPPFLAGS: -I${SRCDIR}/ggml/include
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <string.h>
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "ggml-cpu.h"
|
#include "ggml-cpu.h"
|
||||||
#include "ggml-backend.h"
|
#include "ggml-backend.h"
|
||||||
@@ -21,6 +23,54 @@ COMPILER inline get_compiler() {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Define a fixed-size struct to store timing data
|
||||||
|
#define MAX_TENSOR_NAME 256
|
||||||
|
#define MAX_TIMINGS 1000
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char tensor_name[MAX_TENSOR_NAME];
|
||||||
|
double duration_ms;
|
||||||
|
} timing_entry;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
timing_entry entries[MAX_TIMINGS];
|
||||||
|
int count;
|
||||||
|
} timing_data;
|
||||||
|
|
||||||
|
// Global timing data structure
|
||||||
|
timing_data g_timings = {0};
|
||||||
|
|
||||||
|
double get_time_ms() {
|
||||||
|
struct timespec ts;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
return ts.tv_sec * 1000.0 + ts.tv_nsec / 1000000.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool debug_callback(struct ggml_tensor * t, bool ask, void * user_data) {
|
||||||
|
static double start_time;
|
||||||
|
static char current_tensor[MAX_TENSOR_NAME];
|
||||||
|
|
||||||
|
if (ask) {
|
||||||
|
start_time = get_time_ms();
|
||||||
|
strncpy(current_tensor, t->name, MAX_TENSOR_NAME - 1);
|
||||||
|
current_tensor[MAX_TENSOR_NAME - 1] = '\0';
|
||||||
|
} else {
|
||||||
|
double end_time = get_time_ms();
|
||||||
|
double duration = end_time - start_time;
|
||||||
|
|
||||||
|
if (g_timings.count < MAX_TIMINGS) {
|
||||||
|
strncpy(g_timings.entries[g_timings.count].tensor_name, current_tensor, MAX_TENSOR_NAME - 1);
|
||||||
|
g_timings.entries[g_timings.count].duration_ms = duration;
|
||||||
|
g_timings.count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear_timings() {
|
||||||
|
g_timings.count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
*/
|
*/
|
||||||
import "C"
|
import "C"
|
||||||
|
|
||||||
@@ -29,9 +79,11 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
fs "github.com/ollama/ollama/fs/ggml"
|
fs "github.com/ollama/ollama/fs/ggml"
|
||||||
"github.com/ollama/ollama/ml"
|
"github.com/ollama/ollama/ml"
|
||||||
@@ -256,7 +308,62 @@ func (c *Context) Forward(t ml.Tensor) {
|
|||||||
C.ggml_build_forward_expand(c.graph, t.(*Tensor).t)
|
C.ggml_build_forward_expand(c.graph, t.(*Tensor).t)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Timing retrieves the collected timing data
|
||||||
|
func (c *Context) Timing() []ml.OpTiming {
|
||||||
|
sequence := make([]ml.OpTiming, C.g_timings.count)
|
||||||
|
|
||||||
|
for i := range int(C.g_timings.count) {
|
||||||
|
entry := C.g_timings.entries[i]
|
||||||
|
tensorName := C.GoString(&entry.tensor_name[0])
|
||||||
|
|
||||||
|
// Determine operation type and description based on tensor name
|
||||||
|
var opType ml.OpType
|
||||||
|
var opDesc string
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case strings.Contains(tensorName, "(view)"):
|
||||||
|
opType, opDesc = ml.View, "Memory view"
|
||||||
|
case strings.Contains(tensorName, "(copy)") || strings.Contains(tensorName, "(copy of"):
|
||||||
|
opType, opDesc = ml.Copy, "Memory copy"
|
||||||
|
case strings.Contains(tensorName, "(reshaped)"):
|
||||||
|
opType, opDesc = ml.Reshape, "Reshape"
|
||||||
|
case strings.Contains(tensorName, "(permuted)"):
|
||||||
|
opType, opDesc = ml.Permute, "Permute dimensions"
|
||||||
|
case strings.Contains(tensorName, "(cont)"):
|
||||||
|
opType, opDesc = ml.Contiguous, "Make contiguous"
|
||||||
|
case strings.Contains(tensorName, "(transposed)"):
|
||||||
|
opType, opDesc = ml.Transpose, "Transpose"
|
||||||
|
case strings.HasPrefix(tensorName, "leaf_"):
|
||||||
|
opType, opDesc = ml.Input, fmt.Sprintf("Input tensor %s", tensorName)
|
||||||
|
case strings.HasPrefix(tensorName, "node_"):
|
||||||
|
opType, opDesc = ml.ComputeOp, fmt.Sprintf("Computation %s", tensorName)
|
||||||
|
default:
|
||||||
|
opType, opDesc = "Unknown", tensorName
|
||||||
|
}
|
||||||
|
|
||||||
|
sequence[i] = ml.OpTiming{
|
||||||
|
Type: opType,
|
||||||
|
Operation: opDesc,
|
||||||
|
Duration: float64(entry.duration_ms),
|
||||||
|
Order: i,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sequence
|
||||||
|
}
|
||||||
|
|
||||||
func (c *Context) Compute(tensors ...ml.Tensor) {
|
func (c *Context) Compute(tensors ...ml.Tensor) {
|
||||||
|
if envconfig.Benchmark() {
|
||||||
|
// Clear previous timings before new computation
|
||||||
|
C.clear_timings()
|
||||||
|
|
||||||
|
C.ggml_backend_sched_set_eval_callback(
|
||||||
|
c.sched,
|
||||||
|
C.ggml_backend_eval_callback(C.debug_callback),
|
||||||
|
nil,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
C.ggml_backend_sched_graph_compute_async(c.sched, c.graph)
|
C.ggml_backend_sched_graph_compute_async(c.sched, c.graph)
|
||||||
|
|
||||||
needSync := true
|
needSync := true
|
||||||
|
|||||||
4
ml/backend/ggml/ggml/src/CMakeLists.txt
vendored
4
ml/backend/ggml/ggml/src/CMakeLists.txt
vendored
@@ -293,10 +293,6 @@ if (GGML_CPU_ALL_VARIANTS)
|
|||||||
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
||||||
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
||||||
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
||||||
if (NOT MSVC)
|
|
||||||
# MSVC doesn't support AMX
|
|
||||||
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
|
||||||
endif()
|
|
||||||
else ()
|
else ()
|
||||||
ggml_add_cpu_backend_variant_impl("")
|
ggml_add_cpu_backend_variant_impl("")
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
Reference in New Issue
Block a user