mirror of
https://github.com/ollama/ollama.git
synced 2026-04-25 18:25:42 +02:00
GGML update to ec98e2002 (#13451)
* Revert "add support for NVIDIA Nemotron 3 Nano"
This reverts commit e7d2ae9d69.
* GGML update to 380b4c984
Remove MaskBatchPadding as GGML_KQ_MASK_PAD is no longer present (no
padding required)
* update to c45f89d55
* ec98e2002
solar pro needed more adjusting - needs verification
* review comments
This commit is contained in:
10
llama/llama.cpp/src/llama-context.h
vendored
10
llama/llama.cpp/src/llama-context.h
vendored
@@ -26,6 +26,10 @@ struct llama_memory_breakdown_data {
|
||||
size_t model = 0; // memory allocated for the model
|
||||
size_t context = 0; // memory allocated for the context
|
||||
size_t compute = 0; // memory allocated for temporary compute buffers
|
||||
|
||||
size_t total() const {
|
||||
return model + context + compute;
|
||||
}
|
||||
};
|
||||
|
||||
struct llama_context {
|
||||
@@ -206,7 +210,8 @@ public:
|
||||
ggml_status graph_compute(ggml_cgraph * gf, bool batched);
|
||||
|
||||
// reserve a graph with a dummy ubatch of the specified size
|
||||
ggml_cgraph * graph_reserve(uint32_t n_tokens, uint32_t n_seqs, uint32_t n_outputs, const llama_memory_context_i * mctx, bool split_only = false);
|
||||
ggml_cgraph * graph_reserve(
|
||||
uint32_t n_tokens, uint32_t n_seqs, uint32_t n_outputs, const llama_memory_context_i * mctx, bool split_only = false, size_t * sizes = nullptr);
|
||||
|
||||
private:
|
||||
llm_graph_params graph_params(
|
||||
@@ -281,9 +286,10 @@ private:
|
||||
|
||||
std::vector<std::pair<ggml_backend_t, ggml_backend_set_n_threads_t>> set_n_threads_fns;
|
||||
|
||||
// buffer types used for the compute buffer of each backend
|
||||
// pointers and buffer types used for the compute buffer of each backend
|
||||
std::vector<ggml_backend_t> backend_ptrs;
|
||||
std::vector<ggml_backend_buffer_type_t> backend_buft;
|
||||
std::vector<size_t> backend_buf_exp_size; // expected buffer sizes
|
||||
|
||||
llm_graph_result_ptr gf_res_prev;
|
||||
llm_graph_result_ptr gf_res_reserve;
|
||||
|
||||
Reference in New Issue
Block a user