mirror of
https://github.com/ollama/ollama.git
synced 2026-04-21 00:05:40 +02:00
server: decompress zstd request bodies in cloud passthrough middleware (#14827)
When a zstd-compressed request (e.g. from Codex CLI) hits /v1/responses with a cloud model the request failed. Fix by decompressing zstd bodies before model extraction, so cloud models are detected and proxied directly without the writer being wrapped.
This commit is contained in:
@@ -16,6 +16,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
|
||||
"github.com/ollama/ollama/auth"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
@@ -29,6 +30,9 @@ const (
|
||||
cloudProxyBaseURLEnv = "OLLAMA_CLOUD_BASE_URL"
|
||||
legacyCloudAnthropicKey = "legacy_cloud_anthropic_web_search"
|
||||
cloudProxyClientVersionHeader = "X-Ollama-Client-Version"
|
||||
|
||||
// maxDecompressedBodySize limits the size of a decompressed request body
|
||||
maxDecompressedBodySize = 20 << 20
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -73,6 +77,19 @@ func cloudPassthroughMiddleware(disabledOperation string) gin.HandlerFunc {
|
||||
return
|
||||
}
|
||||
|
||||
// Decompress zstd-encoded request bodies so we can inspect the model
|
||||
if c.GetHeader("Content-Encoding") == "zstd" {
|
||||
reader, err := zstd.NewReader(c.Request.Body, zstd.WithDecoderMaxMemory(8<<20))
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "failed to decompress request body"})
|
||||
c.Abort()
|
||||
return
|
||||
}
|
||||
defer reader.Close()
|
||||
c.Request.Body = http.MaxBytesReader(c.Writer, io.NopCloser(reader), maxDecompressedBodySize)
|
||||
c.Request.Header.Del("Content-Encoding")
|
||||
}
|
||||
|
||||
// TODO(drifkin): Avoid full-body buffering here for model detection.
|
||||
// A future optimization can parse just enough JSON to read "model" (and
|
||||
// optionally short-circuit cloud-disabled explicit-cloud requests) while
|
||||
|
||||
Reference in New Issue
Block a user