mirror of
https://github.com/ollama/ollama.git
synced 2026-04-19 12:54:19 +02:00
cloud_proxy: for the web_search legacy path, flush on newlines (#14897)
`WebSearchAnthropicWriter` expects a single object per write. The new transparent proxy will instead send it whatever bytes it sees. This cloud-model + local-orchestration + cloud-search is a temporary code path, so instead of making the web search code more robust to this, I put an adapter in the middle that will flush line-by-line to preserve the old behavior.
This commit is contained in:
@@ -226,7 +226,24 @@ func proxyCloudRequestWithPath(c *gin.Context, body []byte, path string, disable
|
||||
copyProxyResponseHeaders(c.Writer.Header(), resp.Header)
|
||||
c.Status(resp.StatusCode)
|
||||
|
||||
if err := copyProxyResponseBody(c.Writer, resp.Body); err != nil {
|
||||
var bodyWriter http.ResponseWriter = c.Writer
|
||||
var framedWriter *jsonlFramingResponseWriter
|
||||
// TEMP(drifkin): only needed on the cloud-proxied first leg of Anthropic
|
||||
// web_search fallback (which is a path we're removing soon). Local
|
||||
// /v1/messages writes one JSON value per streamResponse callback directly
|
||||
// into WebSearchAnthropicWriter, but this proxy copy loop may coalesce
|
||||
// multiple jsonl records into one Write. WebSearchAnthropicWriter currently
|
||||
// unmarshals one JSON value per Write.
|
||||
if path == "/api/chat" && resp.StatusCode == http.StatusOK && c.GetBool(legacyCloudAnthropicKey) {
|
||||
framedWriter = &jsonlFramingResponseWriter{ResponseWriter: c.Writer}
|
||||
bodyWriter = framedWriter
|
||||
}
|
||||
|
||||
err = copyProxyResponseBody(bodyWriter, resp.Body)
|
||||
if err == nil && framedWriter != nil {
|
||||
err = framedWriter.FlushPending()
|
||||
}
|
||||
if err != nil {
|
||||
ctxErr := c.Request.Context().Err()
|
||||
if errors.Is(err, context.Canceled) && errors.Is(ctxErr, context.Canceled) {
|
||||
slog.Debug(
|
||||
@@ -240,6 +257,7 @@ func proxyCloudRequestWithPath(c *gin.Context, body []byte, path string, disable
|
||||
slog.Warn(
|
||||
"cloud proxy response copy failed",
|
||||
"path", c.Request.URL.Path,
|
||||
"upstream_path", path,
|
||||
"status", resp.StatusCode,
|
||||
"request_context_canceled", ctxErr != nil,
|
||||
"request_context_err", ctxErr,
|
||||
@@ -473,6 +491,55 @@ func copyProxyResponseBody(dst http.ResponseWriter, src io.Reader) error {
|
||||
}
|
||||
}
|
||||
|
||||
type jsonlFramingResponseWriter struct {
|
||||
http.ResponseWriter
|
||||
pending []byte
|
||||
}
|
||||
|
||||
func (w *jsonlFramingResponseWriter) Flush() {
|
||||
if flusher, ok := w.ResponseWriter.(http.Flusher); ok {
|
||||
flusher.Flush()
|
||||
}
|
||||
}
|
||||
|
||||
func (w *jsonlFramingResponseWriter) Write(p []byte) (int, error) {
|
||||
w.pending = append(w.pending, p...)
|
||||
if err := w.flushCompleteLines(); err != nil {
|
||||
return len(p), err
|
||||
}
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
func (w *jsonlFramingResponseWriter) FlushPending() error {
|
||||
trailing := bytes.TrimSpace(w.pending)
|
||||
w.pending = nil
|
||||
if len(trailing) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
_, err := w.ResponseWriter.Write(trailing)
|
||||
return err
|
||||
}
|
||||
|
||||
func (w *jsonlFramingResponseWriter) flushCompleteLines() error {
|
||||
for {
|
||||
newline := bytes.IndexByte(w.pending, '\n')
|
||||
if newline < 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
line := bytes.TrimSpace(w.pending[:newline])
|
||||
w.pending = w.pending[newline+1:]
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, err := w.ResponseWriter.Write(line); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func isHopByHopHeader(name string) bool {
|
||||
_, ok := hopByHopHeaders[strings.ToLower(name)]
|
||||
return ok
|
||||
|
||||
Reference in New Issue
Block a user