mirror of
https://github.com/ollama/ollama.git
synced 2026-04-22 16:55:44 +02:00
Compare commits
12 Commits
pdevine/ge
...
brucemacd/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5f62064e2f | ||
|
|
e3f3043f5b | ||
|
|
b5fc84c930 | ||
|
|
827b6b5d16 | ||
|
|
0d15036d82 | ||
|
|
d2eb226c91 | ||
|
|
e13e7c8d94 | ||
|
|
78f403ff45 | ||
|
|
08a299e1d0 | ||
|
|
f9c7ead160 | ||
|
|
5930aaeb1a | ||
|
|
faf67db089 |
@@ -24,7 +24,7 @@ set(GGML_LLAMAFILE ON)
|
|||||||
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
|
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
|
||||||
set(GGML_CUDA_GRAPHS ON)
|
set(GGML_CUDA_GRAPHS ON)
|
||||||
|
|
||||||
if((NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
|
if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
|
||||||
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))
|
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))
|
||||||
set(GGML_CPU_ALL_VARIANTS ON)
|
set(GGML_CPU_ALL_VARIANTS ON)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
338
docs/add-a-model.md
Normal file
338
docs/add-a-model.md
Normal file
@@ -0,0 +1,338 @@
|
|||||||
|
# Guide: Implementing Models in Ollama's Go Inference Engine
|
||||||
|
|
||||||
|
> **Note**: This guide and the Go inference engine are in early development and will be updated as implementation details evolve.
|
||||||
|
|
||||||
|
This guide outlines the process of implementing a new model in Ollama's inference engine. It covers everything from initial setup to publishing your model to ollama.com.
|
||||||
|
|
||||||
|
## Architecture Overview
|
||||||
|
|
||||||
|
Below is a diagram showing Ollama's inference engine architecture layers and how they interact:
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph TB
|
||||||
|
subgraph Models["Model Layer: LLM Implementations"]
|
||||||
|
direction TB
|
||||||
|
llama["model/models/llama"]
|
||||||
|
mllama["model/models/mllama"]
|
||||||
|
qwen["model/models/qwen2"]
|
||||||
|
etc["...etc"]
|
||||||
|
|
||||||
|
note1[" Each model implements a<br>specific architecture:<br>- Defines model parameters<br>- Implements forward pass"]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph ML_Ops["Neural Network Operations"]
|
||||||
|
direction TB
|
||||||
|
nn_ops[" nn/<br>linear.go: Matrix multiplication<br>embedding.go: Token embedding lookups<br>normalization.go: Layer norm operations<br>convolution.go: Convolutional operations "]
|
||||||
|
|
||||||
|
backend[" ml/backend.go<br>Hardware Abstraction Layer:<br>- Defines tensor operations<br>- Manages computation graphs<br>- Handles memory allocation "]
|
||||||
|
|
||||||
|
note2[" Common neural net operations:<br>- Abstracts hardware details<br>- Provides unified API<br>- Manages computation flow "]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Hardware["Backend Execution Layer"]
|
||||||
|
direction TB
|
||||||
|
backend_impl[" The backend package provides:<br>- Unified computation interface<br>- Automatic hardware selection<br>- Optimized kernels<br>- Efficient memory management "]
|
||||||
|
|
||||||
|
subgraph Backends["Backend Implementations"]
|
||||||
|
direction LR
|
||||||
|
cpu["backend/cpu<br>- Pure Go implementation<br>- Fallback for all platforms"]
|
||||||
|
|
||||||
|
metal["backend/metal<br>- Apple Silicon (M1/M2/M3)<br>- MLX integration<br>- Leverages Apple Neural Engine"]
|
||||||
|
|
||||||
|
onnx["backend/onnx<br>- Cross-platform compatibility<br>- ONNX Runtime integration<br>- Pre-compiled graph execution"]
|
||||||
|
|
||||||
|
ggml["backend/ggml<br>- CPU/GPU quantized compute<br>- Low-precision operations<br>- Memory-efficient inferencing"]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
Models --> |" Makes high-level calls<br>(e.g., self-attention) "| ML_Ops
|
||||||
|
ML_Ops --> |" Translates to tensor operations<br>(e.g., matmul, softmax) "| Hardware
|
||||||
|
backend_impl --> Backends
|
||||||
|
```
|
||||||
|
|
||||||
|
When implementing a new model, you'll primarily work in the model layer, interfacing with the neural network operations layer.
|
||||||
|
|
||||||
|
## Implementation Process Overview
|
||||||
|
|
||||||
|
Here's the high-level process for implementing a new model in Ollama:
|
||||||
|
|
||||||
|
1. **Environment Setup**: Clone the repository and set up your development environment
|
||||||
|
2. **Research Implementation**: Understand the original model architecture
|
||||||
|
3. **Project Structure Setup**: Set up the necessary file structure
|
||||||
|
4. **Create Basic Modelfile**: Create a simple Modelfile for testing
|
||||||
|
5. **Implement Weight Conversion**: Map from original format to GGUF
|
||||||
|
6. **Open a Draft PR**: Create a draft pull request to establish communication with maintainers
|
||||||
|
7. **Implement Model Logic**: Create the model architecture and forward pass
|
||||||
|
8. **Quality Check and Final Steps**: Create a Modelfile, add tests and ensure functionality
|
||||||
|
10. **Finalize PR and Publish**: Complete the PR and publish to ollama.com
|
||||||
|
|
||||||
|
## Implementation Steps in Detail
|
||||||
|
|
||||||
|
### 1. Environment Setup
|
||||||
|
|
||||||
|
First, clone the Ollama repository and get it running locally. Follow the development setup guide at:
|
||||||
|
https://github.com/ollama/ollama/blob/main/docs/development.md
|
||||||
|
|
||||||
|
### 2. Research Implementation
|
||||||
|
|
||||||
|
Get the original model implementation running. This typically involves:
|
||||||
|
- Cloning the research code repository (usually Python-based)
|
||||||
|
- Setting up the required environment
|
||||||
|
- Running inference with sample inputs
|
||||||
|
- Understanding the model architecture and forward pass
|
||||||
|
|
||||||
|
### 3. Project Structure Setup
|
||||||
|
|
||||||
|
Create the necessary file structure by referencing previous model implementations. You'll need:
|
||||||
|
|
||||||
|
```
|
||||||
|
convert/
|
||||||
|
└── convert_your-model.go # Weight conversion logic (PyTorch/SafeTensors to GGML)
|
||||||
|
model/
|
||||||
|
└── your-model/
|
||||||
|
└── model.go # Architecture and forward pass implementation
|
||||||
|
```
|
||||||
|
|
||||||
|
Add your model to the main paths in [model/models/models.go](https://github.com/ollama/ollama/blob/main/model/models/models.go):
|
||||||
|
|
||||||
|
```
|
||||||
|
package models
|
||||||
|
|
||||||
|
import (
|
||||||
|
_ "github.com/ollama/ollama/model/models/llama"
|
||||||
|
_ "github.com/ollama/ollama/model/models/mllama"
|
||||||
|
_ "github.com/ollama/ollama/model/models/your-model" // Add your model here
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Create a Basic Modelfile
|
||||||
|
|
||||||
|
Create a simple Modelfile early in the process to facilitate testing:
|
||||||
|
|
||||||
|
```
|
||||||
|
FROM /path/to/model
|
||||||
|
TEMPLATE "{{.Prompt}}" # Use a static prompt format for initial testing
|
||||||
|
```
|
||||||
|
|
||||||
|
This allows you to test your implementation with consistent inputs before finalizing the proper prompt template.
|
||||||
|
|
||||||
|
### 5. Implement Weight Conversion
|
||||||
|
|
||||||
|
- Work on `convert/convert_your-model.go`
|
||||||
|
- Reference existing conversion implementations
|
||||||
|
- Conversion involves mapping from PyTorch/SafeTensors naming to GGUF naming as you see fit
|
||||||
|
- Understand typical GGUF layout and structure:
|
||||||
|
|
||||||
|
**Typical GGUF Layout:**
|
||||||
|
```
|
||||||
|
GGUF
|
||||||
|
├── Metadata Section
|
||||||
|
│ ├── Model Parameters
|
||||||
|
│ │ ├── General architecture parameters
|
||||||
|
│ │ │ ├── "{arch}.vocab_size" (e.g., "llama.vocab_size")
|
||||||
|
│ │ │ ├── "{arch}.context_length" (e.g., "llama.context_length")
|
||||||
|
│ │ │ ├── "{arch}.embedding_length" (e.g., "llama.embedding_length")
|
||||||
|
│ │ │ └── "{arch}.block_count" (e.g., "llama.block_count")
|
||||||
|
│ │ │
|
||||||
|
│ │ └── Architecture-specific parameters
|
||||||
|
│ │ ├── "{arch}.attention.head_count" (e.g., "llama.attention.head_count")
|
||||||
|
│ │ ├── "{arch}.attention.head_count_kv" (e.g., "llama.attention.head_count_kv")
|
||||||
|
│ │ ├── "{arch}.rope.dimension_count" (e.g., "llama.rope.dimension_count")
|
||||||
|
│ │ └── "{arch}.attention.layer_norm_rms_epsilon" (e.g., "llama.attention.layer_norm_rms_epsilon")
|
||||||
|
│ │
|
||||||
|
│ ├── Tokenizer parameters
|
||||||
|
│ │ ├── "tokenizer.ggml.model" (e.g., "llama")
|
||||||
|
│ │ ├── "tokenizer.ggml.tokens" (vocabulary tokens)
|
||||||
|
│ │ ├── "tokenizer.ggml.bos_id" (beginning of sequence token ID)
|
||||||
|
│ │ └── "tokenizer.ggml.eos_id" (end of sequence token ID)
|
||||||
|
│ │
|
||||||
|
│ └── General metadata
|
||||||
|
│ └── "general.architecture" (e.g., "llama", "qwen2", "phi")
|
||||||
|
│
|
||||||
|
└── Tensor Data Section
|
||||||
|
├── Common tensors:
|
||||||
|
│ ├── "token_embd.weight" (token embedding matrix)
|
||||||
|
│ ├── "rope_freqs.weight" (RoPE frequency weights)
|
||||||
|
│ ├── "output_norm.weight" (final layer normalization)
|
||||||
|
│ └── "output.weight" (output projection)
|
||||||
|
│
|
||||||
|
└── Layer-specific tensors:
|
||||||
|
├── "blk.{i}.attn_q.weight" (query projection)
|
||||||
|
├── "blk.{i}.attn_k.weight" (key projection)
|
||||||
|
├── "blk.{i}.attn_v.weight" (value projection)
|
||||||
|
├── "blk.{i}.attn_output.weight" (attention output)
|
||||||
|
├── "blk.{i}.attn_norm.weight" (attention normalization)
|
||||||
|
├── "blk.{i}.ffn_norm.weight" (feed-forward normalization)
|
||||||
|
├── "blk.{i}.ffn_up.weight" (FFN up projection)
|
||||||
|
├── "blk.{i}.ffn_down.weight" (FFN down projection)
|
||||||
|
└── "blk.{i}.ffn_gate.weight" (FFN gate projection)
|
||||||
|
```
|
||||||
|
|
||||||
|
- Key conversion details include:
|
||||||
|
- Linear weight matrices (sometimes need transposition)
|
||||||
|
- Layer normalization weights (might need reshaping)
|
||||||
|
- **Note: In GGML, FFN values are for the MLP (Multi-Layer Perceptron) part of the architecture**
|
||||||
|
|
||||||
|
- Test conversion:
|
||||||
|
```bash
|
||||||
|
go run . create <my-model> -f /path/to/Modelfile
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Open a Draft PR
|
||||||
|
|
||||||
|
After implementing the initial weight conversion, creating a draft pull request is recommended as it:
|
||||||
|
- Establishes a communication channel with Ollama maintainers
|
||||||
|
- Allows for early feedback on your approach
|
||||||
|
- Makes it easier to track progress and changes
|
||||||
|
|
||||||
|
To open a draft PR:
|
||||||
|
1. Fork the repository
|
||||||
|
2. Create a new branch for your model implementation
|
||||||
|
3. Make initial commits with your weight conversion implementation
|
||||||
|
4. Open a PR in the `ollama/ollama` repository and mark it as draft
|
||||||
|
5. Include a clear description of the model you're implementing
|
||||||
|
|
||||||
|
### 7. Implement Model Logic
|
||||||
|
|
||||||
|
- Reference existing model implementations
|
||||||
|
- Implement `New()` and `Forward()` functions in `model.go`:
|
||||||
|
|
||||||
|
**The `New()` function:**
|
||||||
|
- Creates and initializes your model structure
|
||||||
|
- Loads configuration parameters (embedding size, attention heads, etc.)
|
||||||
|
- Sets up the tokenizer with vocabulary and special tokens
|
||||||
|
- Initializes all model layers and weights
|
||||||
|
- **Important**: Sets up the KV cache for efficient inference
|
||||||
|
- Example:
|
||||||
|
```go
|
||||||
|
func New(c ml.Config) (model.Model, error) {
|
||||||
|
m := &Model{
|
||||||
|
// Initialize tokenizer
|
||||||
|
BytePairEncoding: model.NewBytePairEncoding(...),
|
||||||
|
// Create layer arrays
|
||||||
|
Layers: make([]Layer, c.Uint("block_count")),
|
||||||
|
// Set model parameters
|
||||||
|
Options: &Options{...},
|
||||||
|
}
|
||||||
|
// Initialize KV cache for efficient inference
|
||||||
|
m.Cache = kvcache.NewCausalCache(m.Shift)
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**The `Forward()` function:**
|
||||||
|
- **What it does**: Defines the computational graph of your model
|
||||||
|
- **Important**: The graph is NOT executed immediately - it's built first, then executed later when predictions are needed
|
||||||
|
- Takes input tokens and converts them to embeddings
|
||||||
|
- Processes inputs through transformer layers (attention and feed-forward networks)
|
||||||
|
- Creates the path for data flow through your model's components
|
||||||
|
- Example:
|
||||||
|
```go
|
||||||
|
func (m *Model) Forward(ctx ml.Context, opts model.Options) (ml.Tensor, error) {
|
||||||
|
// Convert inputs to tensors
|
||||||
|
inputTensor, _ := ctx.FromIntSlice(opts.Inputs, len(opts.Inputs))
|
||||||
|
positionsTensor, _ := ctx.FromIntSlice(opts.Positions, len(opts.Positions))
|
||||||
|
|
||||||
|
// Initial token embedding
|
||||||
|
hiddenStates := m.TokenEmbedding.Forward(ctx, inputTensor)
|
||||||
|
|
||||||
|
// Process through transformer layers
|
||||||
|
for i, layer := range m.Layers {
|
||||||
|
m.Cache.SetLayer(i)
|
||||||
|
hiddenStates = layer.Forward(ctx, hiddenStates, positionsTensor, m.Cache, m.Options)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final processing and output
|
||||||
|
normalizedOutput := m.OutputNorm.Forward(ctx, hiddenStates, m.modelEpsilon)
|
||||||
|
logits := m.Output.Forward(ctx, normalizedOutput)
|
||||||
|
|
||||||
|
// Return logits for requested positions
|
||||||
|
outputsTensor, _ := ctx.FromIntSlice(opts.Outputs, len(opts.Outputs))
|
||||||
|
return logits.Rows(ctx, outputsTensor), nil
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key Components to Implement:**
|
||||||
|
|
||||||
|
1. **KV Cache**:
|
||||||
|
- Improves inference performance for text generation
|
||||||
|
- How it works: Stores previously computed key and value tensors from self-attention, avoiding redundant computations
|
||||||
|
- Implementation: Use the `kvcache.NewCausalCache()` for autoregressive models
|
||||||
|
- Important: Must implement the `Shift()` function to handle rotary position embeddings with the cache
|
||||||
|
|
||||||
|
2. **Self-Attention**:
|
||||||
|
- Core component that learns contextual relationships between tokens
|
||||||
|
- Implements query, key, value projections and their interactions
|
||||||
|
- Must handle positional encoding (usually Rotary Position Embeddings)
|
||||||
|
- Uses the KV cache to make generation efficient
|
||||||
|
|
||||||
|
3. **Normalization Layers**:
|
||||||
|
- Purpose: Stabilizes training and maintains consistent activation distributions
|
||||||
|
- Types: RMSNorm, LayerNorm, etc. depending on model architecture
|
||||||
|
- Implementation: Apply before attention and feed-forward networks
|
||||||
|
- Example: `normalizedOutput := m.OutputNorm.Forward(ctx, hiddenStates, m.modelEpsilon)`
|
||||||
|
|
||||||
|
4. **Activation Functions**:
|
||||||
|
- Purpose: Introduces non-linearity into the model
|
||||||
|
- Common types: SILU (Sigmoid Linear Unit), GELU, ReLU
|
||||||
|
- Found in feed-forward/MLP blocks
|
||||||
|
- Example:
|
||||||
|
```go
|
||||||
|
// SwiGLU activation in MLP
|
||||||
|
gateActivation := mlp.Gate.Forward(ctx, hiddenState).SILU(ctx)
|
||||||
|
upProjection := mlp.Up.Forward(ctx, hiddenState)
|
||||||
|
intermediateStates := gateActivation.Mul(ctx, upProjection)
|
||||||
|
```
|
||||||
|
- Run your forward pass:
|
||||||
|
```bash
|
||||||
|
# in the root of the ollama directory
|
||||||
|
go build .
|
||||||
|
OLLAMA_DEBUG=1 ./ollama serve
|
||||||
|
OLLAMA_DEBUG=1 ./ollama run <my-model>
|
||||||
|
```
|
||||||
|
- Compare output with research implementation
|
||||||
|
|
||||||
|
### 8. Quality Check and Final Steps
|
||||||
|
|
||||||
|
1. Add comprehensive tests to:
|
||||||
|
- `model_test.go`
|
||||||
|
- `convert_test.go`
|
||||||
|
|
||||||
|
2. Ensure tests cover:
|
||||||
|
- Weight conversion
|
||||||
|
- Model initialization
|
||||||
|
- Text generation
|
||||||
|
|
||||||
|
3. **Create Final Modelfile**
|
||||||
|
- Replace the static prompt with the proper Go template for your model:
|
||||||
|
```
|
||||||
|
FROM <converted-gguf>
|
||||||
|
TEMPLATE <prompt-template> # Add the proper Go template for your model, including tools if needed
|
||||||
|
LICENSE <license-info> # Add appropriate license information
|
||||||
|
# Add additional parameters if needed
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **End-to-end Testing**
|
||||||
|
- Run your model with your local Ollama build to ensure that it functions as expected
|
||||||
|
|
||||||
|
5. Benchmark
|
||||||
|
- Run performance benchmarks on your model implementation
|
||||||
|
```go
|
||||||
|
# from the root of the Ollama directory, while a server is running locally
|
||||||
|
go build .
|
||||||
|
OLLAMA_DEBUG=1 ./ollama serve
|
||||||
|
go test -bench=. -m <your-model-name> ./...
|
||||||
|
```
|
||||||
|
|
||||||
|
### 9. Finalize PR and Publish to ollama.com
|
||||||
|
|
||||||
|
1. **Finalize Pull Request**
|
||||||
|
- Move PR out of draft state
|
||||||
|
- Address reviewer feedback
|
||||||
|
|
||||||
|
2. **Publish to ollama.com**
|
||||||
|
- Push to ollama.com:
|
||||||
|
```bash
|
||||||
|
ollama create <your-namespace>/<your-model> -f /path/to/Modelfile
|
||||||
|
ollama push <your-namespace>/<your-model>
|
||||||
|
```
|
||||||
@@ -0,0 +1,285 @@
|
|||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: jmorganca <jmorganca@gmail.com>
|
||||||
|
Date: Sun, 16 Feb 2025 20:00:22 -0500
|
||||||
|
Subject: [PATCH] use std::filesystem::path instead of wstring
|
||||||
|
|
||||||
|
---
|
||||||
|
ggml/src/ggml-backend-reg.cpp | 116 ++++++++++++----------------------
|
||||||
|
1 file changed, 40 insertions(+), 76 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
|
||||||
|
index 84b21dd8..de78feae 100644
|
||||||
|
--- a/ggml/src/ggml-backend-reg.cpp
|
||||||
|
+++ b/ggml/src/ggml-backend-reg.cpp
|
||||||
|
@@ -72,16 +72,6 @@
|
||||||
|
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-static std::wstring utf8_to_utf16(const std::string & str) {
|
||||||
|
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
||||||
|
- return converter.from_bytes(str);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static std::string utf16_to_utf8(const std::wstring & str) {
|
||||||
|
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
||||||
|
- return converter.to_bytes(str);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
#if defined(__clang__)
|
||||||
|
# pragma clang diagnostic pop
|
||||||
|
#endif
|
||||||
|
@@ -96,12 +86,12 @@ struct dl_handle_deleter {
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
-static dl_handle * dl_load_library(const std::wstring & path) {
|
||||||
|
+static dl_handle * dl_load_library(const std::filesystem::path & path) {
|
||||||
|
// suppress error dialogs for missing DLLs
|
||||||
|
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||||
|
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||||
|
|
||||||
|
- HMODULE handle = LoadLibraryW(path.c_str());
|
||||||
|
+ HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
||||||
|
|
||||||
|
SetErrorMode(old_mode);
|
||||||
|
|
||||||
|
@@ -129,8 +119,8 @@ struct dl_handle_deleter {
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
-static void * dl_load_library(const std::wstring & path) {
|
||||||
|
- dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||||
|
+static void * dl_load_library(const std::filesystem::path & path) {
|
||||||
|
+ dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||||
|
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
@@ -222,11 +212,11 @@ struct ggml_backend_registry {
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
- ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
|
||||||
|
+ ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) {
|
||||||
|
dl_handle_ptr handle { dl_load_library(path) };
|
||||||
|
if (!handle) {
|
||||||
|
if (!silent) {
|
||||||
|
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
|
||||||
|
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str());
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
@@ -234,7 +224,7 @@ struct ggml_backend_registry {
|
||||||
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||||
|
if (score_fn && score_fn() == 0) {
|
||||||
|
if (!silent) {
|
||||||
|
- GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
|
||||||
|
+ GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str());
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
@@ -242,7 +232,7 @@ struct ggml_backend_registry {
|
||||||
|
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
||||||
|
if (!backend_init_fn) {
|
||||||
|
if (!silent) {
|
||||||
|
- GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
|
||||||
|
+ GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str());
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
@@ -251,16 +241,16 @@ struct ggml_backend_registry {
|
||||||
|
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
||||||
|
if (!silent) {
|
||||||
|
if (!reg) {
|
||||||
|
- GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
|
||||||
|
+ GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str());
|
||||||
|
} else {
|
||||||
|
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
||||||
|
- __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
||||||
|
+ __func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
- GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
|
||||||
|
+ GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str());
|
||||||
|
|
||||||
|
register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
|
||||||
|
|
||||||
|
@@ -396,14 +386,14 @@ ggml_backend_t ggml_backend_init_best(void) {
|
||||||
|
|
||||||
|
// Dynamic loading
|
||||||
|
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
||||||
|
- return get_reg().load_backend(utf8_to_utf16(path), false);
|
||||||
|
+ return get_reg().load_backend(path, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
||||||
|
get_reg().unload_backend(reg, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static std::wstring get_executable_path() {
|
||||||
|
+static std::filesystem::path get_executable_path() {
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
// get executable path
|
||||||
|
std::vector<char> path;
|
||||||
|
@@ -415,15 +405,9 @@ static std::wstring get_executable_path() {
|
||||||
|
}
|
||||||
|
path.resize(size);
|
||||||
|
}
|
||||||
|
- std::string base_path(path.data(), size);
|
||||||
|
- // remove executable name
|
||||||
|
- auto last_slash = base_path.find_last_of('/');
|
||||||
|
- if (last_slash != std::string::npos) {
|
||||||
|
- base_path = base_path.substr(0, last_slash);
|
||||||
|
- }
|
||||||
|
- return utf8_to_utf16(base_path + "/");
|
||||||
|
+
|
||||||
|
+ return std::filesystem::path(path.data()).parent_path();
|
||||||
|
#elif defined(__linux__) || defined(__FreeBSD__)
|
||||||
|
- std::string base_path = ".";
|
||||||
|
std::vector<char> path(1024);
|
||||||
|
while (true) {
|
||||||
|
// get executable path
|
||||||
|
@@ -436,76 +420,56 @@ static std::wstring get_executable_path() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (len < (ssize_t) path.size()) {
|
||||||
|
- base_path = std::string(path.data(), len);
|
||||||
|
- // remove executable name
|
||||||
|
- auto last_slash = base_path.find_last_of('/');
|
||||||
|
- if (last_slash != std::string::npos) {
|
||||||
|
- base_path = base_path.substr(0, last_slash);
|
||||||
|
- }
|
||||||
|
- break;
|
||||||
|
+ return std::filesystem::path(path.data()).parent_path();
|
||||||
|
}
|
||||||
|
path.resize(path.size() * 2);
|
||||||
|
}
|
||||||
|
-
|
||||||
|
- return utf8_to_utf16(base_path + "/");
|
||||||
|
#elif defined(_WIN32)
|
||||||
|
std::vector<wchar_t> path(MAX_PATH);
|
||||||
|
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
|
||||||
|
if (len == 0) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
- std::wstring base_path(path.data(), len);
|
||||||
|
- // remove executable name
|
||||||
|
- auto last_slash = base_path.find_last_of('\\');
|
||||||
|
- if (last_slash != std::string::npos) {
|
||||||
|
- base_path = base_path.substr(0, last_slash);
|
||||||
|
- }
|
||||||
|
- return base_path + L"\\";
|
||||||
|
-#else
|
||||||
|
- return {};
|
||||||
|
-#endif
|
||||||
|
-}
|
||||||
|
|
||||||
|
-static std::wstring backend_filename_prefix() {
|
||||||
|
-#ifdef _WIN32
|
||||||
|
- return L"ggml-";
|
||||||
|
+ return std::filesystem::path(path.data()).parent_path();
|
||||||
|
#else
|
||||||
|
- return L"libggml-";
|
||||||
|
+ return {};
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
-static std::wstring backend_filename_suffix() {
|
||||||
|
+static std::string backend_filename_prefix() {
|
||||||
|
#ifdef _WIN32
|
||||||
|
- return L".dll";
|
||||||
|
+ return "ggml-";
|
||||||
|
#else
|
||||||
|
- return L".so";
|
||||||
|
+ return "libggml-";
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
-static std::wstring path_separator() {
|
||||||
|
+static std::string backend_filename_suffix() {
|
||||||
|
#ifdef _WIN32
|
||||||
|
- return L"\\";
|
||||||
|
+ return ".dll";
|
||||||
|
#else
|
||||||
|
- return L"/";
|
||||||
|
+ return ".so";
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
||||||
|
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
||||||
|
// TODO: search system paths
|
||||||
|
- std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
|
||||||
|
- std::vector<std::wstring> search_paths;
|
||||||
|
+ namespace fs = std::filesystem;
|
||||||
|
+ std::string file_prefix = backend_filename_prefix() + name + "-";
|
||||||
|
+ std::vector<fs::path> search_paths;
|
||||||
|
+
|
||||||
|
if (user_search_path == nullptr) {
|
||||||
|
- search_paths.push_back(L"." + path_separator());
|
||||||
|
+ search_paths.push_back(fs::current_path());
|
||||||
|
search_paths.push_back(get_executable_path());
|
||||||
|
} else {
|
||||||
|
- search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
|
||||||
|
+ search_paths.push_back(fs::u8path(user_search_path));
|
||||||
|
}
|
||||||
|
|
||||||
|
int best_score = 0;
|
||||||
|
- std::wstring best_path;
|
||||||
|
+ fs::path best_path;
|
||||||
|
|
||||||
|
- namespace fs = std::filesystem;
|
||||||
|
for (const auto & search_path : search_paths) {
|
||||||
|
if (!fs::exists(search_path)) {
|
||||||
|
continue;
|
||||||
|
@@ -514,31 +478,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
||||||
|
for (const auto & entry : dir_it) {
|
||||||
|
try {
|
||||||
|
if (entry.is_regular_file()) {
|
||||||
|
- std::wstring filename = entry.path().filename().wstring();
|
||||||
|
- std::wstring ext = entry.path().extension().wstring();
|
||||||
|
+ std::string filename = entry.path().filename().string();
|
||||||
|
+ std::string ext = entry.path().extension().string();
|
||||||
|
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
||||||
|
- dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
|
||||||
|
+ dl_handle_ptr handle { dl_load_library(entry.path()) };
|
||||||
|
if (!handle) {
|
||||||
|
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
||||||
|
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||||
|
if (!score_fn) {
|
||||||
|
- GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
||||||
|
+ GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
int s = score_fn();
|
||||||
|
- GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
|
||||||
|
+ GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
|
||||||
|
if (s > best_score) {
|
||||||
|
best_score = s;
|
||||||
|
- best_path = entry.path().wstring();
|
||||||
|
+ best_path = entry.path();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
- GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
|
||||||
|
+ GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -546,7 +510,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
||||||
|
if (best_score == 0) {
|
||||||
|
// try to load the base backend
|
||||||
|
for (const auto & search_path : search_paths) {
|
||||||
|
- std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
|
||||||
|
+ fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix());
|
||||||
|
if (fs::exists(path)) {
|
||||||
|
return get_reg().load_backend(path, silent);
|
||||||
|
}
|
||||||
116
ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
vendored
116
ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
vendored
@@ -72,16 +72,6 @@
|
|||||||
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static std::wstring utf8_to_utf16(const std::string & str) {
|
|
||||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
||||||
return converter.from_bytes(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string utf16_to_utf8(const std::wstring & str) {
|
|
||||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
||||||
return converter.to_bytes(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(__clang__)
|
#if defined(__clang__)
|
||||||
# pragma clang diagnostic pop
|
# pragma clang diagnostic pop
|
||||||
#endif
|
#endif
|
||||||
@@ -96,12 +86,12 @@ struct dl_handle_deleter {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static dl_handle * dl_load_library(const std::wstring & path) {
|
static dl_handle * dl_load_library(const std::filesystem::path & path) {
|
||||||
// suppress error dialogs for missing DLLs
|
// suppress error dialogs for missing DLLs
|
||||||
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||||
|
|
||||||
HMODULE handle = LoadLibraryW(path.c_str());
|
HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
||||||
|
|
||||||
SetErrorMode(old_mode);
|
SetErrorMode(old_mode);
|
||||||
|
|
||||||
@@ -129,8 +119,8 @@ struct dl_handle_deleter {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void * dl_load_library(const std::wstring & path) {
|
static void * dl_load_library(const std::filesystem::path & path) {
|
||||||
dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
|
dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||||
|
|
||||||
return handle;
|
return handle;
|
||||||
}
|
}
|
||||||
@@ -222,11 +212,11 @@ struct ggml_backend_registry {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
|
ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) {
|
||||||
dl_handle_ptr handle { dl_load_library(path) };
|
dl_handle_ptr handle { dl_load_library(path) };
|
||||||
if (!handle) {
|
if (!handle) {
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
|
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str());
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@@ -234,7 +224,7 @@ struct ggml_backend_registry {
|
|||||||
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||||
if (score_fn && score_fn() == 0) {
|
if (score_fn && score_fn() == 0) {
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
|
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str());
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@@ -242,7 +232,7 @@ struct ggml_backend_registry {
|
|||||||
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
||||||
if (!backend_init_fn) {
|
if (!backend_init_fn) {
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
|
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str());
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@@ -251,16 +241,16 @@ struct ggml_backend_registry {
|
|||||||
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
if (!reg) {
|
if (!reg) {
|
||||||
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
|
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str());
|
||||||
} else {
|
} else {
|
||||||
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
||||||
__func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
__func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
|
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str());
|
||||||
|
|
||||||
register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
|
register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
|
||||||
|
|
||||||
@@ -396,14 +386,14 @@ ggml_backend_t ggml_backend_init_best(void) {
|
|||||||
|
|
||||||
// Dynamic loading
|
// Dynamic loading
|
||||||
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
||||||
return get_reg().load_backend(utf8_to_utf16(path), false);
|
return get_reg().load_backend(path, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
||||||
get_reg().unload_backend(reg, true);
|
get_reg().unload_backend(reg, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::wstring get_executable_path() {
|
static std::filesystem::path get_executable_path() {
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
// get executable path
|
// get executable path
|
||||||
std::vector<char> path;
|
std::vector<char> path;
|
||||||
@@ -415,15 +405,9 @@ static std::wstring get_executable_path() {
|
|||||||
}
|
}
|
||||||
path.resize(size);
|
path.resize(size);
|
||||||
}
|
}
|
||||||
std::string base_path(path.data(), size);
|
|
||||||
// remove executable name
|
return std::filesystem::path(path.data()).parent_path();
|
||||||
auto last_slash = base_path.find_last_of('/');
|
|
||||||
if (last_slash != std::string::npos) {
|
|
||||||
base_path = base_path.substr(0, last_slash);
|
|
||||||
}
|
|
||||||
return utf8_to_utf16(base_path + "/");
|
|
||||||
#elif defined(__linux__) || defined(__FreeBSD__)
|
#elif defined(__linux__) || defined(__FreeBSD__)
|
||||||
std::string base_path = ".";
|
|
||||||
std::vector<char> path(1024);
|
std::vector<char> path(1024);
|
||||||
while (true) {
|
while (true) {
|
||||||
// get executable path
|
// get executable path
|
||||||
@@ -436,76 +420,56 @@ static std::wstring get_executable_path() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (len < (ssize_t) path.size()) {
|
if (len < (ssize_t) path.size()) {
|
||||||
base_path = std::string(path.data(), len);
|
return std::filesystem::path(path.data()).parent_path();
|
||||||
// remove executable name
|
|
||||||
auto last_slash = base_path.find_last_of('/');
|
|
||||||
if (last_slash != std::string::npos) {
|
|
||||||
base_path = base_path.substr(0, last_slash);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
path.resize(path.size() * 2);
|
path.resize(path.size() * 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
return utf8_to_utf16(base_path + "/");
|
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
std::vector<wchar_t> path(MAX_PATH);
|
std::vector<wchar_t> path(MAX_PATH);
|
||||||
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
|
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
|
||||||
if (len == 0) {
|
if (len == 0) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
std::wstring base_path(path.data(), len);
|
|
||||||
// remove executable name
|
return std::filesystem::path(path.data()).parent_path();
|
||||||
auto last_slash = base_path.find_last_of('\\');
|
|
||||||
if (last_slash != std::string::npos) {
|
|
||||||
base_path = base_path.substr(0, last_slash);
|
|
||||||
}
|
|
||||||
return base_path + L"\\";
|
|
||||||
#else
|
#else
|
||||||
return {};
|
return {};
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::wstring backend_filename_prefix() {
|
static std::string backend_filename_prefix() {
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
return L"ggml-";
|
return "ggml-";
|
||||||
#else
|
#else
|
||||||
return L"libggml-";
|
return "libggml-";
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::wstring backend_filename_suffix() {
|
static std::string backend_filename_suffix() {
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
return L".dll";
|
return ".dll";
|
||||||
#else
|
#else
|
||||||
return L".so";
|
return ".so";
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::wstring path_separator() {
|
|
||||||
#ifdef _WIN32
|
|
||||||
return L"\\";
|
|
||||||
#else
|
|
||||||
return L"/";
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
||||||
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
||||||
// TODO: search system paths
|
// TODO: search system paths
|
||||||
std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
|
namespace fs = std::filesystem;
|
||||||
std::vector<std::wstring> search_paths;
|
std::string file_prefix = backend_filename_prefix() + name + "-";
|
||||||
|
std::vector<fs::path> search_paths;
|
||||||
|
|
||||||
if (user_search_path == nullptr) {
|
if (user_search_path == nullptr) {
|
||||||
search_paths.push_back(L"." + path_separator());
|
search_paths.push_back(fs::current_path());
|
||||||
search_paths.push_back(get_executable_path());
|
search_paths.push_back(get_executable_path());
|
||||||
} else {
|
} else {
|
||||||
search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
|
search_paths.push_back(fs::u8path(user_search_path));
|
||||||
}
|
}
|
||||||
|
|
||||||
int best_score = 0;
|
int best_score = 0;
|
||||||
std::wstring best_path;
|
fs::path best_path;
|
||||||
|
|
||||||
namespace fs = std::filesystem;
|
|
||||||
for (const auto & search_path : search_paths) {
|
for (const auto & search_path : search_paths) {
|
||||||
if (!fs::exists(search_path)) {
|
if (!fs::exists(search_path)) {
|
||||||
continue;
|
continue;
|
||||||
@@ -514,31 +478,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
|||||||
for (const auto & entry : dir_it) {
|
for (const auto & entry : dir_it) {
|
||||||
try {
|
try {
|
||||||
if (entry.is_regular_file()) {
|
if (entry.is_regular_file()) {
|
||||||
std::wstring filename = entry.path().filename().wstring();
|
std::string filename = entry.path().filename().string();
|
||||||
std::wstring ext = entry.path().extension().wstring();
|
std::string ext = entry.path().extension().string();
|
||||||
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
||||||
dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
|
dl_handle_ptr handle { dl_load_library(entry.path()) };
|
||||||
if (!handle) {
|
if (!handle) {
|
||||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||||
if (!score_fn) {
|
if (!score_fn) {
|
||||||
GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
int s = score_fn();
|
int s = score_fn();
|
||||||
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
|
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
|
||||||
if (s > best_score) {
|
if (s > best_score) {
|
||||||
best_score = s;
|
best_score = s;
|
||||||
best_path = entry.path().wstring();
|
best_path = entry.path();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (const std::exception & e) {
|
} catch (const std::exception & e) {
|
||||||
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
|
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -546,7 +510,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
|||||||
if (best_score == 0) {
|
if (best_score == 0) {
|
||||||
// try to load the base backend
|
// try to load the base backend
|
||||||
for (const auto & search_path : search_paths) {
|
for (const auto & search_path : search_paths) {
|
||||||
std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
|
fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix());
|
||||||
if (fs::exists(path)) {
|
if (fs::exists(path)) {
|
||||||
return get_reg().load_backend(path, silent);
|
return get_reg().load_backend(path, silent);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package progress
|
package progress
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -13,7 +14,8 @@ type State interface {
|
|||||||
|
|
||||||
type Progress struct {
|
type Progress struct {
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
w io.Writer
|
// buffer output to minimize flickering on all terminals
|
||||||
|
w *bufio.Writer
|
||||||
|
|
||||||
pos int
|
pos int
|
||||||
|
|
||||||
@@ -22,7 +24,7 @@ type Progress struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func NewProgress(w io.Writer) *Progress {
|
func NewProgress(w io.Writer) *Progress {
|
||||||
p := &Progress{w: w}
|
p := &Progress{w: bufio.NewWriter(w)}
|
||||||
go p.start()
|
go p.start()
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
@@ -48,11 +50,14 @@ func (p *Progress) Stop() bool {
|
|||||||
stopped := p.stop()
|
stopped := p.stop()
|
||||||
if stopped {
|
if stopped {
|
||||||
fmt.Fprint(p.w, "\n")
|
fmt.Fprint(p.w, "\n")
|
||||||
|
p.w.Flush()
|
||||||
}
|
}
|
||||||
return stopped
|
return stopped
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Progress) StopAndClear() bool {
|
func (p *Progress) StopAndClear() bool {
|
||||||
|
defer p.w.Flush()
|
||||||
|
|
||||||
fmt.Fprint(p.w, "\033[?25l")
|
fmt.Fprint(p.w, "\033[?25l")
|
||||||
defer fmt.Fprint(p.w, "\033[?25h")
|
defer fmt.Fprint(p.w, "\033[?25h")
|
||||||
|
|
||||||
@@ -81,20 +86,24 @@ func (p *Progress) render() {
|
|||||||
p.mu.Lock()
|
p.mu.Lock()
|
||||||
defer p.mu.Unlock()
|
defer p.mu.Unlock()
|
||||||
|
|
||||||
|
defer p.w.Flush()
|
||||||
|
|
||||||
|
// eliminate flickering on terminals that support synchronized output
|
||||||
|
fmt.Fprint(p.w, "\033[?2026h")
|
||||||
|
defer fmt.Fprint(p.w, "\033[?2026l")
|
||||||
|
|
||||||
fmt.Fprint(p.w, "\033[?25l")
|
fmt.Fprint(p.w, "\033[?25l")
|
||||||
defer fmt.Fprint(p.w, "\033[?25h")
|
defer fmt.Fprint(p.w, "\033[?25h")
|
||||||
|
|
||||||
// clear already rendered progress lines
|
// move the cursor back to the beginning
|
||||||
for i := range p.pos {
|
for range p.pos - 1 {
|
||||||
if i > 0 {
|
fmt.Fprint(p.w, "\033[A")
|
||||||
fmt.Fprint(p.w, "\033[A")
|
|
||||||
}
|
|
||||||
fmt.Fprint(p.w, "\033[2K\033[1G")
|
|
||||||
}
|
}
|
||||||
|
fmt.Fprint(p.w, "\033[1G")
|
||||||
|
|
||||||
// render progress lines
|
// render progress lines
|
||||||
for i, state := range p.states {
|
for i, state := range p.states {
|
||||||
fmt.Fprint(p.w, state.String())
|
fmt.Fprint(p.w, state.String(), "\033[K")
|
||||||
if i < len(p.states)-1 {
|
if i < len(p.states)-1 {
|
||||||
fmt.Fprint(p.w, "\n")
|
fmt.Fprint(p.w, "\n")
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user