mirror of
https://github.com/ollama/ollama.git
synced 2026-04-19 06:55:04 +02:00
Update vendor ggml code to a5bb8ba4 (#13832)
Co-authored-by: Daniel Hiltgen <daniel@ollama.com> Co-authored-by: Gabe Goodhart <ghart@us.ibm.com> Co-authored-by: Shalini Salomi Bodapati <Shalini.Salomi.Bodapati@ibm.com>
This commit is contained in:
@@ -1,31 +1,32 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Daniel Hiltgen <daniel@ollama.com>
|
||||
Date: Sun, 30 Nov 2025 11:05:56 -0800
|
||||
From: nobody <>
|
||||
Date: Sat, 10 Jan 2026 15:27:57 -0800
|
||||
Subject: [PATCH] ggml: Export GPU UUIDs
|
||||
|
||||
---
|
||||
ggml/include/ggml-backend.h | 1 +
|
||||
ggml/src/ggml-cuda/ggml-cuda.cu | 67 +++++++++++++++++++++++++++---
|
||||
ggml/include/ggml-backend.h | 2 +
|
||||
ggml/src/ggml-cuda/ggml-cuda.cu | 72 +++++++++++++++++++++++++++---
|
||||
ggml/src/ggml-metal/ggml-metal.cpp | 1 +
|
||||
3 files changed, 63 insertions(+), 6 deletions(-)
|
||||
3 files changed, 69 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
|
||||
index a7ebe5dcd..03557bb31 100644
|
||||
index 393c329be..99412fe56 100644
|
||||
--- a/ggml/include/ggml-backend.h
|
||||
+++ b/ggml/include/ggml-backend.h
|
||||
@@ -158,6 +158,7 @@ extern "C" {
|
||||
@@ -158,6 +158,8 @@ extern "C" {
|
||||
const char * description;
|
||||
// device free memory in bytes
|
||||
size_t memory_free;
|
||||
+ // device UUID
|
||||
+ const char * id;
|
||||
// device total memory in bytes
|
||||
size_t memory_total;
|
||||
// device type
|
||||
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
index 6519af435..c9d3a2b03 100644
|
||||
index 290d762ad..9b9e053f0 100644
|
||||
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
@@ -189,6 +189,51 @@ static int ggml_cuda_parse_id(char devName[]) {
|
||||
@@ -191,6 +191,51 @@ static int ggml_cuda_parse_id(char devName[]) {
|
||||
}
|
||||
#endif // defined(GGML_USE_HIP)
|
||||
|
||||
@@ -77,7 +78,7 @@ index 6519af435..c9d3a2b03 100644
|
||||
static ggml_cuda_device_info ggml_cuda_init() {
|
||||
ggml_cuda_device_info info = {};
|
||||
|
||||
@@ -255,22 +300,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
||||
@@ -255,22 +300,29 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
||||
info.devices[id].cc += prop.minor * 0x10;
|
||||
}
|
||||
}
|
||||
@@ -102,21 +103,26 @@ index 6519af435..c9d3a2b03 100644
|
||||
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
||||
- GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
|
||||
- id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
||||
+#ifdef __CUDA_ARCH_LIST__
|
||||
+ if (std::getenv("GGML_CUDA_INIT") != NULL) {
|
||||
+ GGML_ASSERT(ggml_cuda_has_arch(info.devices[id].cc) && "ggml was not compiled with support for this arch");
|
||||
+ }
|
||||
+#endif // defined(__CUDA_ARCH_LIST__)
|
||||
+ GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
|
||||
+ id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
||||
+ ggml_cuda_parse_uuid(prop, id).c_str());
|
||||
std::string device_name(prop.name);
|
||||
if (device_name == "NVIDIA GeForce MX450") {
|
||||
turing_devices_without_mma.push_back({ id, device_name });
|
||||
@@ -4110,6 +4157,7 @@ struct ggml_backend_cuda_device_context {
|
||||
@@ -4155,6 +4207,7 @@ struct ggml_backend_cuda_device_context {
|
||||
std::string name;
|
||||
std::string description;
|
||||
std::string pci_bus_id;
|
||||
+ std::string id;
|
||||
int op_offload_min_batch_size;
|
||||
};
|
||||
|
||||
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
|
||||
@@ -4198,6 +4246,11 @@ static bool ggml_backend_cuda_get_available_uma_memory(long * available_memory_k
|
||||
@@ -4244,6 +4297,11 @@ static bool ggml_backend_cuda_get_available_uma_memory(long * available_memory_k
|
||||
}
|
||||
#endif // defined(__linux__)
|
||||
|
||||
@@ -128,7 +134,7 @@ index 6519af435..c9d3a2b03 100644
|
||||
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
||||
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
|
||||
ggml_cuda_set_device(ctx->device);
|
||||
@@ -4238,6 +4291,7 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
|
||||
@@ -4284,6 +4342,7 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
|
||||
|
||||
props->name = ggml_backend_cuda_device_get_name(dev);
|
||||
props->description = ggml_backend_cuda_device_get_description(dev);
|
||||
@@ -136,7 +142,7 @@ index 6519af435..c9d3a2b03 100644
|
||||
props->type = ggml_backend_cuda_device_get_type(dev);
|
||||
props->device_id = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
|
||||
ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
||||
@@ -4834,6 +4888,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||
@@ -4900,6 +4959,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||
cudaDeviceProp prop;
|
||||
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
|
||||
dev_ctx->description = prop.name;
|
||||
@@ -145,7 +151,7 @@ index 6519af435..c9d3a2b03 100644
|
||||
char pci_bus_id[16] = {};
|
||||
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
|
||||
diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp
|
||||
index f2b7fe692..8fc1c2fb5 100644
|
||||
index 790cabca0..516d74064 100644
|
||||
--- a/ggml/src/ggml-metal/ggml-metal.cpp
|
||||
+++ b/ggml/src/ggml-metal/ggml-metal.cpp
|
||||
@@ -547,6 +547,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
|
||||
|
||||
Reference in New Issue
Block a user