From 1a636fb47af789da25dd13f1cac2d4eb3b674c75 Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Wed, 18 Feb 2026 17:19:36 -0800 Subject: [PATCH] cmd: set codex env vars on launch and handle zstd request bodies (#14122) The Codex runner was not setting OPENAI_BASE_URL or OPENAI_API_KEY, this prevents Codex from sending requests to api.openai.com instead of the local Ollama server. This mirrors the approach used by the Claude runner. Codex v0.98.0 sends zstd-compressed request bodies to the /v1/responses endpoint. Add decompression support in ResponsesMiddleware with an 8MB max decompressed size limit to prevent resource exhaustion. --- cmd/config/codex.go | 5 ++ go.mod | 1 + go.sum | 4 +- middleware/openai.go | 12 +++++ middleware/openai_test.go | 100 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 120 insertions(+), 2 deletions(-) diff --git a/cmd/config/codex.go b/cmd/config/codex.go index f9c52f61d..ee2c70542 100644 --- a/cmd/config/codex.go +++ b/cmd/config/codex.go @@ -6,6 +6,7 @@ import ( "os/exec" "strings" + "github.com/ollama/ollama/envconfig" "golang.org/x/mod/semver" ) @@ -32,6 +33,10 @@ func (c *Codex) Run(model string, args []string) error { cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr + cmd.Env = append(os.Environ(), + "OPENAI_BASE_URL="+envconfig.Host().String()+"/v1/", + "OPENAI_API_KEY=ollama", + ) return cmd.Run() } diff --git a/go.mod b/go.mod index a4d6ccfed..a5bac3028 100644 --- a/go.mod +++ b/go.mod @@ -26,6 +26,7 @@ require ( github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1 github.com/dlclark/regexp2 v1.11.4 github.com/emirpasic/gods/v2 v2.0.0-alpha + github.com/klauspost/compress v1.18.3 github.com/mattn/go-runewidth v0.0.16 github.com/nlpodyssey/gopickle v0.3.0 github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c diff --git a/go.sum b/go.sum index 13df21902..13dd35638 100644 --- a/go.sum +++ b/go.sum @@ -122,7 +122,6 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/flatbuffers v2.0.0+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI= @@ -150,8 +149,9 @@ github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+ github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.13.1 h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ= github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= +github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw= +github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= diff --git a/middleware/openai.go b/middleware/openai.go index e2db8f965..dc40fa351 100644 --- a/middleware/openai.go +++ b/middleware/openai.go @@ -11,6 +11,7 @@ import ( "time" "github.com/gin-gonic/gin" + "github.com/klauspost/compress/zstd" "github.com/ollama/ollama/api" "github.com/ollama/ollama/openai" @@ -496,6 +497,17 @@ func (w *ResponsesWriter) Write(data []byte) (int, error) { func ResponsesMiddleware() gin.HandlerFunc { return func(c *gin.Context) { + if c.GetHeader("Content-Encoding") == "zstd" { + reader, err := zstd.NewReader(c.Request.Body, zstd.WithDecoderMaxMemory(8<<20)) + if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "failed to decompress zstd body")) + return + } + defer reader.Close() + c.Request.Body = io.NopCloser(reader) + c.Request.Header.Del("Content-Encoding") + } + var req openai.ResponsesRequest if err := c.ShouldBindJSON(&req); err != nil { c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error())) diff --git a/middleware/openai_test.go b/middleware/openai_test.go index 0cf2558ae..79b595a72 100644 --- a/middleware/openai_test.go +++ b/middleware/openai_test.go @@ -14,6 +14,7 @@ import ( "github.com/gin-gonic/gin" "github.com/google/go-cmp/cmp" + "github.com/klauspost/compress/zstd" "github.com/ollama/ollama/api" "github.com/ollama/ollama/openai" @@ -1238,3 +1239,102 @@ func TestImageEditsMiddleware(t *testing.T) { }) } } + +func zstdCompress(t *testing.T, data []byte) []byte { + t.Helper() + var buf bytes.Buffer + w, err := zstd.NewWriter(&buf) + if err != nil { + t.Fatal(err) + } + if _, err := w.Write(data); err != nil { + t.Fatal(err) + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + return buf.Bytes() +} + +func TestResponsesMiddlewareZstd(t *testing.T) { + tests := []struct { + name string + body string + useZstd bool + oversized bool + wantCode int + wantModel string + wantMessage string + }{ + { + name: "plain JSON", + body: `{"model": "test-model", "input": "Hello"}`, + wantCode: http.StatusOK, + wantModel: "test-model", + wantMessage: "Hello", + }, + { + name: "zstd compressed", + body: `{"model": "test-model", "input": "Hello"}`, + useZstd: true, + wantCode: http.StatusOK, + wantModel: "test-model", + wantMessage: "Hello", + }, + { + name: "zstd over max decompressed size", + oversized: true, + useZstd: true, + wantCode: http.StatusBadRequest, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var capturedRequest *api.ChatRequest + + gin.SetMode(gin.TestMode) + router := gin.New() + router.Use(ResponsesMiddleware(), captureRequestMiddleware(&capturedRequest)) + router.Handle(http.MethodPost, "/v1/responses", func(c *gin.Context) { + c.Status(http.StatusOK) + }) + + var bodyReader io.Reader + if tt.oversized { + bodyReader = bytes.NewReader(zstdCompress(t, bytes.Repeat([]byte("A"), 9<<20))) + } else if tt.useZstd { + bodyReader = bytes.NewReader(zstdCompress(t, []byte(tt.body))) + } else { + bodyReader = strings.NewReader(tt.body) + } + + req, _ := http.NewRequest(http.MethodPost, "/v1/responses", bodyReader) + req.Header.Set("Content-Type", "application/json") + if tt.useZstd || tt.oversized { + req.Header.Set("Content-Encoding", "zstd") + } + + resp := httptest.NewRecorder() + router.ServeHTTP(resp, req) + + if resp.Code != tt.wantCode { + t.Fatalf("expected status %d, got %d: %s", tt.wantCode, resp.Code, resp.Body.String()) + } + + if tt.wantCode != http.StatusOK { + return + } + + if capturedRequest == nil { + t.Fatal("expected captured request, got nil") + } + if capturedRequest.Model != tt.wantModel { + t.Fatalf("expected model %q, got %q", tt.wantModel, capturedRequest.Model) + } + if len(capturedRequest.Messages) != 1 || capturedRequest.Messages[0].Content != tt.wantMessage { + t.Fatalf("expected single user message %q, got %+v", tt.wantMessage, capturedRequest.Messages) + } + }) + } +}