adding test

add test
launch: add thinking capability detection to opencode
2026-04-19 03:54:21 +02:00 · 2026-04-14 15:28:40 -07:00 · 2026-04-14 15:28:40 -07:00 · 2026-04-14 15:28:40 -07:00 · 2026-04-14 15:08:42 -07:00 · 2026-04-14 13:57:35 -07:00
658 changed files with 113000 additions and 16498 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -27,7 +27,7 @@ jobs:
          echo vendorsha=$(make -f Makefile.sync print-base) | tee -a $GITHUB_OUTPUT
  darwin-build:
-    runs-on: macos-14-xlarge
+    runs-on: macos-26-xlarge
    environment: release
    needs: setup-environment
    env:
@@ -117,6 +117,25 @@ jobs:
            install: https://sdk.lunarg.com/sdk/download/1.4.321.1/windows/vulkansdk-windows-X64-1.4.321.1.exe
            flags: ''
            runner_dir: 'vulkan'
          - os: windows
            arch: amd64
            preset: 'MLX CUDA 13'
            install: https://developer.download.nvidia.com/compute/cuda/13.0.0/local_installers/cuda_13.0.0_windows.exe
            cudnn-install: https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.18.1.3_cuda13-archive.zip
            cuda-components:
              - '"cudart"'
              - '"nvcc"'
              - '"cublas"'
              - '"cublas_dev"'
              - '"cufft"'
              - '"cufft_dev"'
              - '"nvrtc"'
              - '"nvrtc_dev"'
              - '"crt"'
              - '"nvvm"'
              - '"nvptxcompiler"'
            cuda-version: '13.0'
            flags: ''
    runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
    environment: release
    env:
@@ -125,8 +144,10 @@ jobs:
      - name: Install system dependencies
        run: |
          choco install -y --no-progress ccache ninja
-          ccache -o cache_dir=${{ github.workspace }}\.ccache
+          if (Get-Command ccache -ErrorAction SilentlyContinue) {
-      - if: startsWith(matrix.preset, 'CUDA ') || startsWith(matrix.preset, 'ROCm ') || startsWith(matrix.preset, 'Vulkan')
+            ccache -o cache_dir=${{ github.workspace }}\.ccache
          }
      - if: startsWith(matrix.preset, 'CUDA ') || startsWith(matrix.preset, 'ROCm ') || startsWith(matrix.preset, 'Vulkan') || startsWith(matrix.preset, 'MLX ')
        id: cache-install
        uses: actions/cache/restore@v4
        with:
@@ -134,8 +155,9 @@ jobs:
            C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
            C:\Program Files\AMD\ROCm
            C:\VulkanSDK
-          key: ${{ matrix.install }}
+            C:\Program Files\NVIDIA\CUDNN
-      - if: startsWith(matrix.preset, 'CUDA ')
+          key: ${{ matrix.install }}-${{ matrix.cudnn-install }}
      - if: startsWith(matrix.preset, 'CUDA ') || startsWith(matrix.preset, 'MLX ')
        name: Install CUDA ${{ matrix.cuda-version }}
        run: |
          $ErrorActionPreference = "Stop"
@@ -179,6 +201,23 @@ jobs:
        run: |
          echo "CC=clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "CXX=clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
      - if: startsWith(matrix.preset, 'MLX ')
        name: Install cuDNN for MLX
        run: |
          $ErrorActionPreference = "Stop"
          $cudnnRoot = "C:\Program Files\NVIDIA\CUDNN"
          if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
            Invoke-WebRequest -Uri "${{ matrix.cudnn-install }}" -OutFile "cudnn.zip"
            Expand-Archive -Path cudnn.zip -DestinationPath cudnn-extracted
            $cudnnDir = (Get-ChildItem -Path cudnn-extracted -Directory)[0].FullName
            New-Item -ItemType Directory -Force -Path $cudnnRoot
            Copy-Item -Path "$cudnnDir\*" -Destination "$cudnnRoot\" -Recurse
          }
          echo "CUDNN_ROOT_DIR=$cudnnRoot" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "CUDNN_INCLUDE_PATH=$cudnnRoot\include" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "CUDNN_LIBRARY_PATH=$cudnnRoot\lib\x64" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "$cudnnRoot\bin\x64" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
      - if: ${{ !cancelled() && steps.cache-install.outputs.cache-hit != 'true' }}
        uses: actions/cache/save@v4
        with:
@@ -186,7 +225,8 @@ jobs:
            C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
            C:\Program Files\AMD\ROCm
            C:\VulkanSDK
-          key: ${{ matrix.install }}
+            C:\Program Files\NVIDIA\CUDNN
          key: ${{ matrix.install }}-${{ matrix.cudnn-install }}
      - uses: actions/checkout@v4
      - uses: actions/cache@v4
        with:
@@ -198,7 +238,7 @@ jobs:
          Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation  -DevCmdArguments '-arch=x64 -no_logo'
          cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }} --install-prefix "$((pwd).Path)\dist\${{ matrix.os }}-${{ matrix.arch }}"
          cmake --build --parallel ([Environment]::ProcessorCount) --preset "${{ matrix.preset }}"
-          cmake --install build --component "${{ startsWith(matrix.preset, 'CUDA ') && 'CUDA' || startsWith(matrix.preset, 'ROCm ') && 'HIP' || startsWith(matrix.preset, 'Vulkan') && 'Vulkan' || 'CPU' }}" --strip
+          cmake --install build --component "${{ startsWith(matrix.preset, 'MLX ') && 'MLX' || startsWith(matrix.preset, 'CUDA ') && 'CUDA' || startsWith(matrix.preset, 'ROCm ') && 'HIP' || startsWith(matrix.preset, 'Vulkan') && 'Vulkan' || 'CPU' }}" --strip
          Remove-Item -Path dist\lib\ollama\rocm\rocblas\library\*gfx906* -ErrorAction SilentlyContinue
        env:
          CMAKE_GENERATOR: Ninja
@@ -337,6 +377,7 @@ jobs:
          name: bundles-windows
          path: |
            dist/*.zip
            dist/*.ps1
            dist/OllamaSetup.exe
  linux-build:
@@ -383,6 +424,7 @@ jobs:
              lib/ollama/cuda_v*)        echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/vulkan*)        echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/mlx*)           echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/include*)       echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/cuda_jetpack5)  echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
              lib/ollama/cuda_jetpack6)  echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
              lib/ollama/rocm)           echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
@@ -514,6 +556,9 @@ jobs:
      - name: Log dist contents
        run: |
          ls -l dist/
      - name: Copy install scripts to dist
        run: |
          cp scripts/install.sh dist/install.sh
      - name: Generate checksum file
        run: find . -type f -not -name 'sha256sum.txt' | xargs sha256sum | tee sha256sum.txt
        working-directory: dist
@@ -536,14 +581,22 @@ jobs:
      - name: Upload release artifacts
        run: |
          pids=()
-          for payload in dist/*.txt dist/*.zip dist/*.tgz dist/*.tar.zst dist/*.exe dist/*.dmg ; do
+          for payload in dist/*.txt dist/*.zip dist/*.tgz dist/*.tar.zst dist/*.exe dist/*.dmg dist/*.ps1 dist/*.sh ; do
            echo "Uploading $payload"
            gh release upload ${GITHUB_REF_NAME} $payload --clobber &
-            pids[$!]=$!
+            pids+=($!)
            sleep 1
          done
          echo "Waiting for uploads to complete"
-          for pid in "${pids[*]}"; do
+          failed=0
-            wait $pid
+          for pid in "${pids[@]}"; do
            if ! wait $pid; then
              echo "::error::Upload failed (pid $pid)"
              failed=1
            fi
          done
          if [ $failed -ne 0 ]; then
            echo "One or more uploads failed"
            exit 1
          fi
          echo "done"
--- a/.github/workflows/test-install.yaml
+++ b/.github/workflows/test-install.yaml
@@ -0,0 +1,22 @@
 name: test-install
 on:
  pull_request:
    paths:
      - 'scripts/install.sh'
      - '.github/workflows/test-install.yaml'
 jobs:
  test:
    strategy:
      matrix:
        os: [ubuntu-latest, macos-latest]
    runs-on: ${{ matrix.os }}
    steps:
      - uses: actions/checkout@v4
      - name: Run install script
        run: sh ./scripts/install.sh
        env:
          OLLAMA_NO_START: 1 # do not start app
      - name: Verify ollama is available
        run: ollama --version
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -37,7 +37,7 @@ jobs:
              | xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
          }
-          echo changed=$(changed 'llama/llama.cpp/**/*' 'ml/backend/ggml/ggml/**/*') | tee -a $GITHUB_OUTPUT
+          echo changed=$(changed 'llama/llama.cpp/**/*' 'ml/backend/ggml/ggml/**/*' '.github/**/*') | tee -a $GITHUB_OUTPUT
          echo vendorsha=$(make -f Makefile.sync print-base) | tee -a $GITHUB_OUTPUT
  linux:
@@ -51,7 +51,7 @@ jobs:
            container: nvidia/cuda:13.0.0-devel-ubuntu22.04
            flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
          - preset: ROCm
-            container: rocm/dev-ubuntu-22.04:6.1.2
+            container: rocm/dev-ubuntu-22.04:7.2.1
            extra-packages: rocm-libs
            flags: '-DAMDGPU_TARGETS=gfx1010 -DCMAKE_PREFIX_PATH=/opt/rocm'
          - preset: Vulkan
@@ -60,6 +60,11 @@ jobs:
              mesa-vulkan-drivers vulkan-tools
              libvulkan1 libvulkan-dev
              vulkan-sdk cmake ccache g++ make
          - preset: 'MLX CUDA 13'
            container: nvidia/cuda:13.0.0-devel-ubuntu22.04
            extra-packages: libcudnn9-dev-cuda-13 libopenblas-dev liblapack-dev liblapacke-dev git curl
            flags: '-DCMAKE_CUDA_ARCHITECTURES=87 -DBLAS_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu -DLAPACK_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu'
            install-go: true
    runs-on: linux
    container: ${{ matrix.container }}
    steps:
@@ -76,19 +81,29 @@ jobs:
            $sudo apt-get update
          fi
          $sudo apt-get install -y cmake ccache ${{ matrix.extra-packages }}
          # MLX requires CMake 3.25+, install from official releases
          if [ "${{ matrix.preset }}" = "MLX CUDA 13" ]; then
            curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.31.2/cmake-3.31.2-linux-$(uname -m).tar.gz | $sudo tar xz -C /usr/local --strip-components 1
          fi
          # Export VULKAN_SDK if provided by LunarG package (defensive)
          if [ -d "/usr/lib/x86_64-linux-gnu/vulkan" ] && [ "${{ matrix.preset }}" = "Vulkan" ]; then
            echo "VULKAN_SDK=/usr" >> $GITHUB_ENV
          fi
        env:
          DEBIAN_FRONTEND: noninteractive
      - if: matrix.install-go
        name: Install Go
        run: |
          GO_VERSION=$(awk '/^go / { print $2 }' go.mod)
          curl -fsSL "https://golang.org/dl/go${GO_VERSION}.linux-$(dpkg --print-architecture).tar.gz" | tar xz -C /usr/local
          echo "/usr/local/go/bin" >> $GITHUB_PATH
      - uses: actions/cache@v4
        with:
          path: /github/home/.cache/ccache
          key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}-${{ needs.changes.outputs.vendorsha }}
      - run: |
-          cmake --preset ${{ matrix.preset }} ${{ matrix.flags }}
+          cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }}
-          cmake --build --preset ${{ matrix.preset }} --parallel
+          cmake --build --preset "${{ matrix.preset }}" --parallel
  windows:
    needs: [changes]
@@ -114,12 +129,31 @@ jobs:
            flags: '-DAMDGPU_TARGETS=gfx1010 -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" -DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma"'
          - preset: Vulkan
            install: https://sdk.lunarg.com/sdk/download/1.4.321.1/windows/vulkansdk-windows-X64-1.4.321.1.exe
          - preset: 'MLX CUDA 13'
            install: https://developer.download.nvidia.com/compute/cuda/13.0.0/local_installers/cuda_13.0.0_windows.exe
            cudnn-install: https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.18.1.3_cuda13-archive.zip
            flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
            cuda-components:
              - '"cudart"'
              - '"nvcc"'
              - '"cublas"'
              - '"cublas_dev"'
              - '"cufft"'
              - '"cufft_dev"'
              - '"nvrtc"'
              - '"nvrtc_dev"'
              - '"crt"'
              - '"nvvm"'
              - '"nvptxcompiler"'
            cuda-version: '13.0'
    runs-on: windows
    steps:
      - run: |
          choco install -y --no-progress ccache ninja
-          ccache -o cache_dir=${{ github.workspace }}\.ccache
+          if (Get-Command ccache -ErrorAction SilentlyContinue) {
-      - if: matrix.preset == 'CUDA' || matrix.preset == 'ROCm' || matrix.preset == 'Vulkan'
+            ccache -o cache_dir=${{ github.workspace }}\.ccache
          }
      - if: matrix.preset == 'CUDA' || matrix.preset == 'ROCm' || matrix.preset == 'Vulkan' || matrix.preset == 'MLX CUDA 13'
        id: cache-install
        uses: actions/cache/restore@v4
        with:
@@ -127,8 +161,9 @@ jobs:
            C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
            C:\Program Files\AMD\ROCm
            C:\VulkanSDK
-          key: ${{ matrix.install }}
+            C:\Program Files\NVIDIA\CUDNN
-      - if: matrix.preset == 'CUDA'
+          key: ${{ matrix.install }}-${{ matrix.cudnn-install }}
      - if: matrix.preset == 'CUDA' || matrix.preset == 'MLX CUDA 13'
        name: Install CUDA ${{ matrix.cuda-version }}
        run: |
          $ErrorActionPreference = "Stop"
@@ -168,6 +203,23 @@ jobs:
          $vulkanPath = (Resolve-Path "C:\VulkanSDK\*").path
          echo "$vulkanPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          echo "VULKAN_SDK=$vulkanPath" >> $env:GITHUB_ENV
      - if: matrix.preset == 'MLX CUDA 13'
        name: Install cuDNN for MLX
        run: |
          $ErrorActionPreference = "Stop"
          $cudnnRoot = "C:\Program Files\NVIDIA\CUDNN"
          if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
            Invoke-WebRequest -Uri "${{ matrix.cudnn-install }}" -OutFile "cudnn.zip"
            Expand-Archive -Path cudnn.zip -DestinationPath cudnn-extracted
            $cudnnDir = (Get-ChildItem -Path cudnn-extracted -Directory)[0].FullName
            New-Item -ItemType Directory -Force -Path $cudnnRoot
            Copy-Item -Path "$cudnnDir\*" -Destination "$cudnnRoot\" -Recurse
          }
          echo "CUDNN_ROOT_DIR=$cudnnRoot" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "CUDNN_INCLUDE_PATH=$cudnnRoot\include" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "CUDNN_LIBRARY_PATH=$cudnnRoot\lib\x64" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "$cudnnRoot\bin\x64" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
      - if: ${{ !cancelled() && steps.cache-install.outputs.cache-hit != 'true' }}
        uses: actions/cache/save@v4
        with:
@@ -175,7 +227,8 @@ jobs:
            C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
            C:\Program Files\AMD\ROCm
            C:\VulkanSDK
-          key: ${{ matrix.install }}
+            C:\Program Files\NVIDIA\CUDNN
          key: ${{ matrix.install }}-${{ matrix.cudnn-install }}
      - uses: actions/checkout@v4
      - uses: actions/cache@v4
        with:
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,4 @@ __debug_bin*
 llama/build
 llama/vendor
 /ollama
 integration/testdata/models/
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,10 +64,15 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY         ${OLLAMA_BUILD_DIR})
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG   ${OLLAMA_BUILD_DIR})
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${OLLAMA_BUILD_DIR})
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
+# Store ggml include paths for use with target_include_directories later.
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include)
+# We avoid global include_directories() to prevent polluting the include path
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu)
+# for other projects like MLX (whose openblas dependency has its own common.h).
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx)
+set(GGML_INCLUDE_DIRS
    ${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src
    ${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include
    ${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu
    ${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx
 )
 add_compile_definitions(NDEBUG GGML_VERSION=0x0 GGML_COMMIT=0x0)
@@ -87,6 +92,14 @@ if(NOT CPU_VARIANTS)
    set(CPU_VARIANTS "ggml-cpu")
 endif()
 # Apply ggml include directories to ggml targets only (not globally)
 target_include_directories(ggml-base PRIVATE ${GGML_INCLUDE_DIRS})
 foreach(variant ${CPU_VARIANTS})
    if(TARGET ${variant})
        target_include_directories(${variant} PRIVATE ${GGML_INCLUDE_DIRS})
    endif()
 endforeach()
 install(TARGETS ggml-base ${CPU_VARIANTS}
    RUNTIME_DEPENDENCIES
        PRE_EXCLUDE_REGEXES ".*"
@@ -103,6 +116,7 @@ if(CMAKE_CUDA_COMPILER)
    find_package(CUDAToolkit)
    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
    target_include_directories(ggml-cuda PRIVATE ${GGML_INCLUDE_DIRS})
    install(TARGETS ggml-cuda
        RUNTIME_DEPENDENCIES
            DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_BIN_DIR}/x64 ${CUDAToolkit_LIBRARY_DIR}
@@ -134,6 +148,7 @@ if(CMAKE_HIP_COMPILER)
    if(AMDGPU_TARGETS)
        find_package(hip REQUIRED)
        add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)
        target_include_directories(ggml-hip PRIVATE ${GGML_INCLUDE_DIRS})
        if (WIN32)
            target_compile_definitions(ggml-hip PRIVATE GGML_CUDA_NO_PEER_COPY)
@@ -148,7 +163,7 @@ if(CMAKE_HIP_COMPILER)
        )
        install(RUNTIME_DEPENDENCY_SET rocm
                DIRECTORIES ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR}
-                PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf
+                PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register roctx64 rocroller drm drm_amdgpu numa elf
                PRE_EXCLUDE_REGEXES ".*"
                POST_EXCLUDE_REGEXES "system32"
            RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
@@ -168,6 +183,7 @@ if(NOT APPLE)
    find_package(Vulkan)
    if(Vulkan_FOUND)
        add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-vulkan)
        target_include_directories(ggml-vulkan PRIVATE ${GGML_INCLUDE_DIRS})
        install(TARGETS ggml-vulkan
            RUNTIME_DEPENDENCIES
                PRE_INCLUDE_REGEXES vulkan
@@ -179,18 +195,43 @@ if(NOT APPLE)
 endif()
 option(MLX_ENGINE "Enable MLX backend" OFF)
 if(MLX_ENGINE)
    message(STATUS "Setting up MLX (this takes a while...)")
-    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/x/ml/backend/mlx)
+    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/x/imagegen/mlx)
    # Find CUDA toolkit if MLX is built with CUDA support
    find_package(CUDAToolkit)
    # Build list of directories for runtime dependency resolution
    set(MLX_RUNTIME_DIRS ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_BIN_DIR}/x64 ${CUDAToolkit_LIBRARY_DIR})
    # Add cuDNN bin paths for DLLs (Windows MLX CUDA builds)
    # CUDNN_ROOT_DIR is the standard CMake variable for cuDNN location
    if(DEFINED ENV{CUDNN_ROOT_DIR})
        # cuDNN 9.x has versioned subdirectories under bin/ (e.g., bin/13.0/)
        file(GLOB CUDNN_BIN_SUBDIRS "$ENV{CUDNN_ROOT_DIR}/bin/*")
        list(APPEND MLX_RUNTIME_DIRS ${CUDNN_BIN_SUBDIRS})
    endif()
    # Add build output directory and MLX dependency build directories
    list(APPEND MLX_RUNTIME_DIRS ${OLLAMA_BUILD_DIR})
    # OpenBLAS DLL location (pre-built zip extracts into openblas-src/bin/)
    list(APPEND MLX_RUNTIME_DIRS ${CMAKE_BINARY_DIR}/_deps/openblas-src/bin)
    # NCCL: on Linux, if real NCCL is found, cmake bundles libnccl.so via the
    # regex below. If NCCL is not found, MLX links a static stub (OBJECT lib)
    # so there is no runtime dependency. This path covers the stub build dir
    # for windows so we include the DLL in our dependencies.
    list(APPEND MLX_RUNTIME_DIRS ${CMAKE_BINARY_DIR}/_deps/mlx-build/mlx/distributed/nccl/nccl_stub-prefix/src/nccl_stub-build/Release)
    # Base regexes for runtime dependencies (cross-platform)
    set(MLX_INCLUDE_REGEXES cublas cublasLt cudart cufft nvrtc nvrtc-builtins cudnn nccl openblas gfortran)
    # On Windows, also include dl.dll (dlfcn-win32 POSIX emulation layer)
    if(WIN32)
        list(APPEND MLX_INCLUDE_REGEXES "^dl\\.dll$")
    endif()
    install(TARGETS mlx mlxc
        RUNTIME_DEPENDENCIES
-            DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_BIN_DIR}/x64 ${CUDAToolkit_LIBRARY_DIR}
+            DIRECTORIES ${MLX_RUNTIME_DIRS}
-            PRE_INCLUDE_REGEXES cublas cublasLt cudart nvrtc nvrtc-builtins cudnn nccl openblas gfortran
+            PRE_INCLUDE_REGEXES ${MLX_INCLUDE_REGEXES}
            PRE_EXCLUDE_REGEXES ".*"
        RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
        LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
@@ -205,13 +246,117 @@ if(MLX_ENGINE)
            COMPONENT MLX)
    endif()
-    # Manually install cudart and cublas since they might not be picked up as direct dependencies
+    # Install headers for NVRTC JIT compilation at runtime.
    # MLX's own install rules use the default component so they get skipped by
    # --component MLX. Headers are installed alongside libmlx in OLLAMA_INSTALL_DIR.
    #
    # Layout:
    #   ${OLLAMA_INSTALL_DIR}/include/cccl/{cuda,nv}/  — CCCL headers
    #   ${OLLAMA_INSTALL_DIR}/include/*.h               — CUDA toolkit headers
    #
    # MLX's jit_module.cpp resolves CCCL via
    #   current_binary_dir()[.parent_path()] / "include" / "cccl"
    # On Linux, MLX's jit_module.cpp resolves CCCL via
    #   current_binary_dir().parent_path() / "include" / "cccl", so we create a
    #   symlink from lib/ollama/include -> ${OLLAMA_RUNNER_DIR}/include
    #   This will need refinement if we add multiple CUDA versions for MLX in the future.
    # CUDA runtime headers are found via CUDA_PATH env var (set by mlxrunner).
    if(EXISTS ${CMAKE_BINARY_DIR}/_deps/cccl-src/include/cuda)
        install(DIRECTORY ${CMAKE_BINARY_DIR}/_deps/cccl-src/include/cuda
            DESTINATION ${OLLAMA_INSTALL_DIR}/include/cccl
            COMPONENT MLX)
        install(DIRECTORY ${CMAKE_BINARY_DIR}/_deps/cccl-src/include/nv
            DESTINATION ${OLLAMA_INSTALL_DIR}/include/cccl
            COMPONENT MLX)
        if(NOT WIN32 AND NOT APPLE)
            install(CODE "
                set(_link \"${CMAKE_INSTALL_PREFIX}/lib/ollama/include\")
                set(_target \"${OLLAMA_RUNNER_DIR}/include\")
                if(NOT EXISTS \${_link})
                    execute_process(COMMAND \${CMAKE_COMMAND} -E create_symlink \${_target} \${_link})
                endif()
            " COMPONENT MLX)
        endif()
    endif()
    # Install minimal CUDA toolkit headers needed by MLX JIT kernels.
    # These are the transitive closure of includes from mlx/backend/cuda/device/*.cuh.
    # The Go mlxrunner sets CUDA_PATH to OLLAMA_INSTALL_DIR so MLX finds them at
    # $CUDA_PATH/include/*.h via NVRTC --include-path.
    if(CUDAToolkit_FOUND)
-        file(GLOB CUDART_LIBS
+        # CUDAToolkit_INCLUDE_DIRS may be a semicolon-separated list
        # (e.g. ".../include;.../include/cccl"). Find the entry that
        # contains the CUDA runtime headers we need.
        set(_cuda_inc "")
        foreach(_dir ${CUDAToolkit_INCLUDE_DIRS})
            if(EXISTS "${_dir}/cuda_runtime_api.h")
                set(_cuda_inc "${_dir}")
                break()
            endif()
        endforeach()
        if(NOT _cuda_inc)
            message(WARNING "Could not find cuda_runtime_api.h in CUDAToolkit_INCLUDE_DIRS: ${CUDAToolkit_INCLUDE_DIRS}")
        else()
            set(_dst "${OLLAMA_INSTALL_DIR}/include")
            set(_MLX_JIT_CUDA_HEADERS
                builtin_types.h
                cooperative_groups.h
                cuda_bf16.h
                cuda_bf16.hpp
                cuda_device_runtime_api.h
                cuda_fp16.h
                cuda_fp16.hpp
                cuda_fp8.h
                cuda_fp8.hpp
                cuda_runtime_api.h
                device_types.h
                driver_types.h
                math_constants.h
                surface_types.h
                texture_types.h
                vector_functions.h
                vector_functions.hpp
                vector_types.h
            )
            foreach(_hdr ${_MLX_JIT_CUDA_HEADERS})
                install(FILES "${_cuda_inc}/${_hdr}"
                    DESTINATION ${_dst}
                    COMPONENT MLX)
            endforeach()
            # Subdirectory headers
            install(DIRECTORY "${_cuda_inc}/cooperative_groups"
                DESTINATION ${_dst}
                COMPONENT MLX
                FILES_MATCHING PATTERN "*.h")
            install(FILES "${_cuda_inc}/crt/host_defines.h"
                DESTINATION "${_dst}/crt"
                COMPONENT MLX)
        endif()
    endif()
    # On Windows, explicitly install dl.dll (dlfcn-win32 POSIX dlopen emulation)
    # RUNTIME_DEPENDENCIES auto-excludes it via POST_EXCLUDE_FILES_STRICT because
    # dlfcn-win32 is a known CMake target with its own install rules (which install
    # to the wrong destination). We must install it explicitly here.
    if(WIN32)
        install(FILES ${OLLAMA_BUILD_DIR}/dl.dll
            DESTINATION ${OLLAMA_INSTALL_DIR}
            COMPONENT MLX)
    endif()
    # Manually install CUDA runtime libraries that MLX loads via dlopen
    # (not detected by RUNTIME_DEPENDENCIES since they aren't link-time deps)
    if(CUDAToolkit_FOUND)
        file(GLOB MLX_CUDA_LIBS
            "${CUDAToolkit_LIBRARY_DIR}/libcudart.so*"
-            "${CUDAToolkit_LIBRARY_DIR}/libcublas.so*")
+            "${CUDAToolkit_LIBRARY_DIR}/libcublas.so*"
-        if(CUDART_LIBS)
+            "${CUDAToolkit_LIBRARY_DIR}/libcublasLt.so*"
-            install(FILES ${CUDART_LIBS}
+            "${CUDAToolkit_LIBRARY_DIR}/libnvrtc.so*"
            "${CUDAToolkit_LIBRARY_DIR}/libnvrtc-builtins.so*"
            "${CUDAToolkit_LIBRARY_DIR}/libcufft.so*"
            "${CUDAToolkit_LIBRARY_DIR}/libcudnn.so*")
        if(MLX_CUDA_LIBS)
            install(FILES ${MLX_CUDA_LIBS}
                DESTINATION ${OLLAMA_INSTALL_DIR}
                COMPONENT MLX)
        endif()
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -77,6 +77,15 @@
        "OLLAMA_RUNNER_DIR": "rocm"
      }
    },
    {
      "name": "ROCm 7",
      "inherits": [ "ROCm" ],
      "cacheVariables": {
        "CMAKE_HIP_FLAGS": "-parallel-jobs=4",
        "AMDGPU_TARGETS": "gfx942;gfx950;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151;gfx1200;gfx1201;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-",
        "OLLAMA_RUNNER_DIR": "rocm"
      }
    },
    {
      "name": "Vulkan",
      "inherits": [ "Default" ],
@@ -103,6 +112,7 @@
      "name": "MLX CUDA 13",
      "inherits": [ "MLX", "CUDA 13" ],
      "cacheVariables": {
        "MLX_CUDA_ARCHITECTURES": "86;89;90;90a;100;103;75-virtual;80-virtual;110-virtual;120-virtual;121-virtual",
        "OLLAMA_RUNNER_DIR": "mlx_cuda_v13"
      }
    }
@@ -158,6 +168,11 @@
      "inherits": [ "ROCm" ],
      "configurePreset": "ROCm 6"
    },
    {
      "name": "ROCm 7",
      "inherits": [ "ROCm" ],
      "configurePreset": "ROCm 7"
    },
    {
      "name": "Vulkan",
      "targets": [ "ggml-vulkan" ],
--- a/135
+++ b/135
@@ -1,33 +1,23 @@
 # vim: filetype=dockerfile
 ARG FLAVOR=${TARGETARCH}
 ARG PARALLEL=8
-ARG ROCMVERSION=6.3.3
+ARG ROCMVERSION=7.2.1
 ARG JETPACK5VERSION=r35.4.1
 ARG JETPACK6VERSION=r36.4.0
 ARG CMAKEVERSION=3.31.2
 ARG NINJAVERSION=1.12.1
 ARG VULKANVERSION=1.4.321.1
-# We require gcc v10 minimum.  v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
+# Default empty stages for local MLX source overrides.
 # Override with: docker build --build-context local-mlx=../mlx --build-context local-mlx-c=../mlx-c
 FROM scratch AS local-mlx
 FROM scratch AS local-mlx-c
 FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
-RUN yum install -y yum-utils \
+RUN dnf install -y yum-utils ccache gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ gcc-toolset-11-binutils \
    && yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
    && rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
    && dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
    && dnf install -y ccache \
    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
-ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
+ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
 ARG VULKANVERSION
 RUN wget https://sdk.lunarg.com/sdk/download/${VULKANVERSION}/linux/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz -O /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
    && tar xvf /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
    && dnf -y install ninja-build \
    && ln -s /usr/bin/python3 /usr/bin/python \  
    && /${VULKANVERSION}/vulkansdk -j 8 vulkan-headers \
    && /${VULKANVERSION}/vulkansdk -j 8 shaderc
 RUN cp -r /${VULKANVERSION}/x86_64/include/* /usr/local/include/ \
    && cp -r /${VULKANVERSION}/x86_64/lib/* /usr/local/lib
 ENV PATH=/${VULKANVERSION}/x86_64/bin:$PATH
 FROM --platform=linux/arm64 almalinux:8 AS base-arm64
 # install epel-release for ccache
@@ -38,100 +28,119 @@ ENV CC=clang CXX=clang++
 FROM base-${TARGETARCH} AS base
 ARG CMAKEVERSION
 ARG NINJAVERSION
 RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
 RUN dnf install -y unzip \
    && curl -fsSL -o /tmp/ninja.zip https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux$([ "$(uname -m)" = "aarch64" ] && echo "-aarch64").zip \
    && unzip /tmp/ninja.zip -d /usr/local/bin \
    && rm /tmp/ninja.zip
 ENV CMAKE_GENERATOR=Ninja
 ENV LDFLAGS=-s
 FROM base AS cpu
 RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++
 ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
 ARG PARALLEL
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'CPU' \
-        && cmake --build --parallel ${PARALLEL} --preset 'CPU' \
+        && cmake --build --preset 'CPU' -- -l $(nproc) \
-        && cmake --install build --component CPU --strip --parallel ${PARALLEL}
+        && cmake --install build --component CPU --strip
 FROM base AS cuda-11
 ARG CUDA11VERSION=11.8
 RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
 ENV PATH=/usr/local/cuda-11/bin:$PATH
 ARG PARALLEL
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'CUDA 11' \
-        && cmake --build --parallel ${PARALLEL} --preset 'CUDA 11' \
+        && cmake --build --preset 'CUDA 11' -- -l $(nproc) \
-        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
+        && cmake --install build --component CUDA --strip
 FROM base AS cuda-12
 ARG CUDA12VERSION=12.8
 RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
 ENV PATH=/usr/local/cuda-12/bin:$PATH
 ARG PARALLEL
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'CUDA 12' \
-        && cmake --build --parallel ${PARALLEL} --preset 'CUDA 12' \
+        && cmake --build --preset 'CUDA 12' -- -l $(nproc) \
-        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
+        && cmake --install build --component CUDA --strip
 FROM base AS cuda-13
 ARG CUDA13VERSION=13.0
 RUN dnf install -y cuda-toolkit-${CUDA13VERSION//./-}
 ENV PATH=/usr/local/cuda-13/bin:$PATH
 ARG PARALLEL
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'CUDA 13' \
-        && cmake --build --parallel ${PARALLEL} --preset 'CUDA 13' \
+        && cmake --build --preset 'CUDA 13' -- -l $(nproc) \
-        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
+        && cmake --install build --component CUDA --strip
-FROM base AS rocm-6
+FROM base AS rocm-7
 ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
 ARG PARALLEL
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
-    cmake --preset 'ROCm 6' \
+    cmake --preset 'ROCm 7' \
-        && cmake --build --parallel ${PARALLEL} --preset 'ROCm 6' \
+        && cmake --build --preset 'ROCm 7' -- -l $(nproc) \
-        && cmake --install build --component HIP --strip --parallel ${PARALLEL}
+        && cmake --install build --component HIP --strip
 RUN rm -f dist/lib/ollama/rocm/rocblas/library/*gfx90[06]*
 FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5
 ARG CMAKEVERSION
-RUN apt-get update && apt-get install -y curl ccache \
+ARG NINJAVERSION
-    && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
+RUN apt-get update && apt-get install -y curl ccache unzip \
    && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 \
    && curl -fsSL -o /tmp/ninja.zip https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux-aarch64.zip \
    && unzip /tmp/ninja.zip -d /usr/local/bin \
    && rm /tmp/ninja.zip
 ENV CMAKE_GENERATOR=Ninja
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 ARG PARALLEL
 RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'JetPack 5' \
-        && cmake --build --parallel ${PARALLEL} --preset 'JetPack 5' \
+        && cmake --build --preset 'JetPack 5' -- -l $(nproc) \
-        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
+        && cmake --install build --component CUDA --strip
 FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6
 ARG CMAKEVERSION
-RUN apt-get update && apt-get install -y curl ccache \
+ARG NINJAVERSION
-    && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
+RUN apt-get update && apt-get install -y curl ccache unzip \
    && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 \
    && curl -fsSL -o /tmp/ninja.zip https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux-aarch64.zip \
    && unzip /tmp/ninja.zip -d /usr/local/bin \
    && rm /tmp/ninja.zip
 ENV CMAKE_GENERATOR=Ninja
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 ARG PARALLEL
 RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'JetPack 6' \
-        && cmake --build --parallel ${PARALLEL} --preset 'JetPack 6' \
+        && cmake --build --preset 'JetPack 6' -- -l $(nproc) \
-        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
+        && cmake --install build --component CUDA --strip
 FROM base AS vulkan
 ARG VULKANVERSION
 RUN ln -s /usr/bin/python3 /usr/bin/python \
    && wget https://sdk.lunarg.com/sdk/download/${VULKANVERSION}/linux/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz -O /tmp/vulkansdk.tar.xz \
    && tar xvf /tmp/vulkansdk.tar.xz -C /tmp \
    && /tmp/${VULKANVERSION}/vulkansdk -j 8 vulkan-headers \
    && /tmp/${VULKANVERSION}/vulkansdk -j 8 shaderc \
    && cp -r /tmp/${VULKANVERSION}/x86_64/include/* /usr/local/include/ \
    && cp -r /tmp/${VULKANVERSION}/x86_64/lib/* /usr/local/lib \
    && cp -r /tmp/${VULKANVERSION}/x86_64/bin/* /usr/local/bin/ \
    && rm -rf /tmp/${VULKANVERSION} /tmp/vulkansdk.tar.xz
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'Vulkan' \
-        && cmake --build --parallel --preset 'Vulkan' \
+        && cmake --build --preset 'Vulkan' -- -l $(nproc) \
-        && cmake --install build --component Vulkan --strip --parallel 8
+        && cmake --install build --component Vulkan --strip
 FROM base AS mlx
 ARG CUDA13VERSION=13.0
@@ -143,20 +152,27 @@ ENV PATH=/usr/local/cuda-13/bin:$PATH
 ENV BLAS_INCLUDE_DIRS=/usr/include/openblas
 ENV LAPACK_INCLUDE_DIRS=/usr/include/openblas
 ENV CGO_LDFLAGS="-L/usr/local/cuda-13/lib64 -L/usr/local/cuda-13/targets/x86_64-linux/lib/stubs"
 ARG PARALLEL
 WORKDIR /go/src/github.com/ollama/ollama
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
-COPY x/ml/backend/mlx x/ml/backend/mlx
+COPY x/imagegen/mlx x/imagegen/mlx
 COPY go.mod go.sum .
-COPY MLX_VERSION .
+COPY MLX_VERSION MLX_C_VERSION .
 RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
 ENV PATH=/usr/local/go/bin:$PATH
 RUN go mod download
 RUN --mount=type=cache,target=/root/.ccache \
-    cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas \
+    --mount=type=bind,from=local-mlx,target=/tmp/local-mlx \
-        && cmake --build --parallel ${PARALLEL} --preset 'MLX CUDA 13' \
+    --mount=type=bind,from=local-mlx-c,target=/tmp/local-mlx-c \
-        && cmake --install build --component MLX --strip --parallel ${PARALLEL}
+    if [ -f /tmp/local-mlx/CMakeLists.txt ]; then \
        export OLLAMA_MLX_SOURCE=/tmp/local-mlx; \
    fi \
    && if [ -f /tmp/local-mlx-c/CMakeLists.txt ]; then \
        export OLLAMA_MLX_C_SOURCE=/tmp/local-mlx-c; \
    fi \
    && cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas \
        && cmake --build --preset 'MLX CUDA 13' -- -l $(nproc) \
        && cmake --install build --component MLX --strip
 FROM base AS build
 WORKDIR /go/src/github.com/ollama/ollama
@@ -165,14 +181,14 @@ RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-
 ENV PATH=/usr/local/go/bin:$PATH
 RUN go mod download
 COPY . .
 # Clone mlx-c headers for CGO (version from MLX_VERSION file)
 RUN git clone --depth 1 --branch "$(cat MLX_VERSION)" https://github.com/ml-explore/mlx-c.git build/_deps/mlx-c-src
 ARG GOFLAGS="'-ldflags=-w -s'"
 ENV CGO_ENABLED=1
-ENV CGO_CFLAGS="-I/go/src/github.com/ollama/ollama/build/_deps/mlx-c-src"
+ARG CGO_CFLAGS
 ARG CGO_CXXFLAGS
 ENV CGO_CFLAGS="${CGO_CFLAGS}"
 ENV CGO_CXXFLAGS="${CGO_CXXFLAGS}"
 RUN --mount=type=cache,target=/root/.cache/go-build \
-    go build -tags mlx -trimpath -buildmode=pie -o /bin/ollama .
+    go build -trimpath -buildmode=pie -o /bin/ollama .
 FROM --platform=linux/amd64 scratch AS amd64
 # COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
@@ -189,10 +205,9 @@ COPY --from=jetpack-5 dist/lib/ollama/ /lib/ollama/
 COPY --from=jetpack-6 dist/lib/ollama/ /lib/ollama/
 FROM scratch AS rocm
-COPY --from=rocm-6 dist/lib/ollama /lib/ollama
+COPY --from=rocm-7 dist/lib/ollama /lib/ollama
 FROM ${FLAVOR} AS archive
 ARG VULKANVERSION
 COPY --from=cpu dist/lib/ollama /lib/ollama
 COPY --from=build /bin/ollama /bin/ollama
--- a/1
+++ b/1
@@ -0,0 +1 @@
 0726ca922fc902c4c61ef9c27d94132be418e945
--- a/2
+++ b/2
@@ -1 +1 @@
-v0.4.1
+38ad257088fb2193ad47e527cf6534a689f30943
--- a/README.md
+++ b/README.md
@@ -1,20 +1,30 @@
-<div align="center">
+<p align="center">
-  <a href="https://ollama.com">
+  <a href="https://ollama.com">
-    <img alt="ollama" width="240" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
+    <img src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7" alt="ollama" width="200"/>
  </a>
-</div>
+</p>
 # Ollama
-Get up and running with large language models.
+Start building with open models.
 ## Download
 ### macOS
-[Download](https://ollama.com/download/Ollama.dmg)
+```shell
 curl -fsSL https://ollama.com/install.sh | sh
 ```
 or [download manually](https://ollama.com/download/Ollama.dmg)
 ### Windows
-[Download](https://ollama.com/download/OllamaSetup.exe)
+```shell
 irm https://ollama.com/install.ps1 | iex
 ```
 or [download manually](https://ollama.com/download/OllamaSetup.exe)
 ### Linux
@@ -36,647 +46,311 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla
 ### Community
 - [Discord](https://discord.gg/ollama)
 - [𝕏 (Twitter)](https://x.com/ollama)
 - [Reddit](https://reddit.com/r/ollama)
-## Quickstart
+## Get started
-To run and chat with [Gemma 3](https://ollama.com/library/gemma3):
+```
 ollama
 ```
-```shell
+You'll be prompted to run a model or connect Ollama to your existing agents or applications such as `claude`, `codex`, `openclaw` and more.
 ### Coding
 To launch a specific integration:
 ```
 ollama launch claude
 ```
 Supported integrations include [Claude Code](https://docs.ollama.com/integrations/claude-code), [Codex](https://docs.ollama.com/integrations/codex), [Droid](https://docs.ollama.com/integrations/droid), and [OpenCode](https://docs.ollama.com/integrations/opencode).
 ### AI assistant
 Use [OpenClaw](https://docs.ollama.com/integrations/openclaw) to turn Ollama into a personal AI assistant across WhatsApp, Telegram, Slack, Discord, and more:
 ```
 ollama launch openclaw
 ```
 ### Chat with a model
 Run and chat with [Gemma 3](https://ollama.com/library/gemma3):
 ```
 ollama run gemma3
 ```
-## Model library
+See [ollama.com/library](https://ollama.com/library) for the full list.
-Ollama supports a list of models available on [ollama.com/library](https://ollama.com/library "ollama model library")
+See the [quickstart guide](https://docs.ollama.com/quickstart) for more details.
 Here are some example models that can be downloaded:
 | Model              | Parameters | Size  | Download                         |
 | ------------------ | ---------- | ----- | -------------------------------- |
 | Gemma 3            | 1B         | 815MB | `ollama run gemma3:1b`           |
 | Gemma 3            | 4B         | 3.3GB | `ollama run gemma3`              |
 | Gemma 3            | 12B        | 8.1GB | `ollama run gemma3:12b`          |
 | Gemma 3            | 27B        | 17GB  | `ollama run gemma3:27b`          |
 | QwQ                | 32B        | 20GB  | `ollama run qwq`                 |
 | DeepSeek-R1        | 7B         | 4.7GB | `ollama run deepseek-r1`         |
 | DeepSeek-R1        | 671B       | 404GB | `ollama run deepseek-r1:671b`    |
 | Llama 4            | 109B       | 67GB  | `ollama run llama4:scout`        |
 | Llama 4            | 400B       | 245GB | `ollama run llama4:maverick`     |
 | Llama 3.3          | 70B        | 43GB  | `ollama run llama3.3`            |
 | Llama 3.2          | 3B         | 2.0GB | `ollama run llama3.2`            |
 | Llama 3.2          | 1B         | 1.3GB | `ollama run llama3.2:1b`         |
 | Llama 3.2 Vision   | 11B        | 7.9GB | `ollama run llama3.2-vision`     |
 | Llama 3.2 Vision   | 90B        | 55GB  | `ollama run llama3.2-vision:90b` |
 | Llama 3.1          | 8B         | 4.7GB | `ollama run llama3.1`            |
 | Llama 3.1          | 405B       | 231GB | `ollama run llama3.1:405b`       |
 | Phi 4              | 14B        | 9.1GB | `ollama run phi4`                |
 | Phi 4 Mini         | 3.8B       | 2.5GB | `ollama run phi4-mini`           |
 | Mistral            | 7B         | 4.1GB | `ollama run mistral`             |
 | Moondream 2        | 1.4B       | 829MB | `ollama run moondream`           |
 | Neural Chat        | 7B         | 4.1GB | `ollama run neural-chat`         |
 | Starling           | 7B         | 4.1GB | `ollama run starling-lm`         |
 | Code Llama         | 7B         | 3.8GB | `ollama run codellama`           |
 | Llama 2 Uncensored | 7B         | 3.8GB | `ollama run llama2-uncensored`   |
 | LLaVA              | 7B         | 4.5GB | `ollama run llava`               |
 | Granite-3.3        | 8B         | 4.9GB | `ollama run granite3.3`          |
 > [!NOTE]
 > You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
 ## Customize a model
 ### Import from GGUF
 Ollama supports importing GGUF models in the Modelfile:
 1. Create a file named `Modelfile`, with a `FROM` instruction with the local filepath to the model you want to import.
   ```
   FROM ./vicuna-33b.Q4_0.gguf
   ```
 2. Create the model in Ollama
   ```shell
   ollama create example -f Modelfile
   ```
 3. Run the model
   ```shell
   ollama run example
   ```
 ### Import from Safetensors
 See the [guide](https://docs.ollama.com/import) on importing models for more information.
 ### Customize a prompt
 Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.2` model:
 ```shell
 ollama pull llama3.2
 ```
 Create a `Modelfile`:
 ```
 FROM llama3.2
 # set the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1
 # set the system message
 SYSTEM """
 You are Mario from Super Mario Bros. Answer as Mario, the assistant, only.
 """
 ```
 Next, create and run the model:
 ```
 ollama create mario -f ./Modelfile
 ollama run mario
 >>> hi
 Hello! It's your friend Mario.
 ```
 For more information on working with a Modelfile, see the [Modelfile](https://docs.ollama.com/modelfile) documentation.
 ## CLI Reference
 ### Create a model
 `ollama create` is used to create a model from a Modelfile.
 ```shell
 ollama create mymodel -f ./Modelfile
 ```
 ### Pull a model
 ```shell
 ollama pull llama3.2
 ```
 > This command can also be used to update a local model. Only the diff will be pulled.
 ### Remove a model
 ```shell
 ollama rm llama3.2
 ```
 ### Copy a model
 ```shell
 ollama cp llama3.2 my-model
 ```
 ### Multiline input
 For multiline input, you can wrap text with `"""`:
 ```
 >>> """Hello,
 ... world!
 ... """
 I'm a basic program that prints the famous "Hello, world!" message to the console.
 ```
 ### Multimodal models
 ```
 ollama run llava "What's in this image? /Users/jmorgan/Desktop/smile.png"
 ```
 > **Output**: The image features a yellow smiley face, which is likely the central focus of the picture.
 ### Pass the prompt as an argument
 ```shell
 ollama run llama3.2 "Summarize this file: $(cat README.md)"
 ```
 > **Output**: Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
 ### Show model information
 ```shell
 ollama show llama3.2
 ```
 ### List models on your computer
 ```shell
 ollama list
 ```
 ### List which models are currently loaded
 ```shell
 ollama ps
 ```
 ### Stop a model which is currently running
 ```shell
 ollama stop llama3.2
 ```
 ### Generate embeddings from the CLI
 ```shell
 ollama run embeddinggemma "Your text to embed"
 ```
 You can also pipe text for scripted workflows:
 ```shell
 echo "Your text to embed" | ollama run embeddinggemma
 ```
 ### Start Ollama
 `ollama serve` is used when you want to start ollama without running the desktop application.
 ## Building
 See the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
 ### Running local builds
 Next, start the server:
 ```shell
 ./ollama serve
 ```
 Finally, in a separate shell, run a model:
 ```shell
 ./ollama run llama3.2
 ```
 ## Building with MLX (experimental)
 First build the MLX libraries:
 ```shell
 cmake --preset MLX
 cmake --build --preset MLX --parallel
 cmake --install build --component MLX
 ```
 When building with the `-tags mlx` flag, the main `ollama` binary includes MLX support for experimental features like image generation:
 ```shell
 go build -tags mlx .
 ```
 Finally, start the server:
 ```
 ./ollama serve
 ```
 ### Building MLX with CUDA
 When building with CUDA, use the preset "MLX CUDA 13" or "MLX CUDA 12" to enable CUDA with default architectures:
 ```shell
 cmake --preset 'MLX CUDA 13'
 cmake --build --preset 'MLX CUDA 13' --parallel
 cmake --install build --component MLX
 ```
 ## REST API
 Ollama has a REST API for running and managing models.
 ### Generate a response
 ```shell
 curl http://localhost:11434/api/generate -d '{
  "model": "llama3.2",
  "prompt":"Why is the sky blue?"
 }'
 ```
 ### Chat with a model
 ```shell
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.2",
+  "model": "gemma3",
-  "messages": [
+  "messages": [{
-    { "role": "user", "content": "why is the sky blue?" }
+    "role": "user",
-  ]
+    "content": "Why is the sky blue?"
  }],
  "stream": false
 }'
 ```
-See the [API documentation](./docs/api.md) for all endpoints.
+See the [API documentation](https://docs.ollama.com/api) for all endpoints.
 ### Python
 ```
 pip install ollama
 ```
 ```python
 from ollama import chat
 response = chat(model='gemma3', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
 ])
 print(response.message.content)
 ```
 ### JavaScript
 ```
 npm i ollama
 ```
 ```javascript
 import ollama from "ollama";
 const response = await ollama.chat({
  model: "gemma3",
  messages: [{ role: "user", content: "Why is the sky blue?" }],
 });
 console.log(response.message.content);
 ```
 ## Supported backends
 - [llama.cpp](https://github.com/ggml-org/llama.cpp) project founded by Georgi Gerganov.
 ## Documentation
 - [CLI reference](https://docs.ollama.com/cli)
 - [REST API reference](https://docs.ollama.com/api)
 - [Importing models](https://docs.ollama.com/import)
 - [Modelfile reference](https://docs.ollama.com/modelfile)
 - [Building from source](https://github.com/ollama/ollama/blob/main/docs/development.md)
 ## Community Integrations
-### Web & Desktop
+> Want to add your project? Open a pull request.
- [Onyx](https://github.com/onyx-dot-app/onyx)
+### Chat Interfaces
 - [Open WebUI](https://github.com/open-webui/open-webui)
 - [SwiftChat (macOS with ReactNative)](https://github.com/aws-samples/swift-chat)
 - [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
 - [Hollama](https://github.com/fmaclen/hollama)
 - [Lollms WebUI (Single user)](https://github.com/ParisNeo/lollms-webui)
 - [Lollms (Multi users)](https://github.com/ParisNeo/lollms)
 - [LibreChat](https://github.com/danny-avila/LibreChat)
 - [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
 - [HTML UI](https://github.com/rtcfirefly/ollama-ui)
 - [AI-UI](https://github.com/bajahaw/ai-ui)
 - [Saddle](https://github.com/jikkuatwork/saddle)
 - [TagSpaces](https://www.tagspaces.org) (A platform for file-based apps, [utilizing Ollama](https://docs.tagspaces.org/ai/) for the generation of tags and descriptions)
 - [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
 - [Chatbot UI v2](https://github.com/mckaywrigley/chatbot-ui)
 - [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
 - [Minimalistic React UI for Ollama Models](https://github.com/richawo/minimal-llm-ui)
 - [Ollamac](https://github.com/kevinhermawan/Ollamac)
 - [big-AGI](https://github.com/enricoros/big-AGI)
 - [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core)
 - [Amica](https://github.com/semperai/amica)
 - [chatd](https://github.com/BruceMacD/chatd)
 - [Ollama-SwiftUI](https://github.com/kghandour/Ollama-SwiftUI)
 - [Dify.AI](https://github.com/langgenius/dify)
 - [MindMac](https://mindmac.app)
 - [NextJS Web Interface for Ollama](https://github.com/jakobhoeg/nextjs-ollama-llm-ui)
 - [Msty](https://msty.app)
 - [Chatbox](https://github.com/Bin-Huang/Chatbox)
 - [WinForm Ollama Copilot](https://github.com/tgraupmann/WinForm_Ollama_Copilot)
 - [NextChat](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web) with [Get Started Doc](https://docs.nextchat.dev/models/ollama)
 - [Alpaca WebUI](https://github.com/mmo80/alpaca-webui)
 - [OllamaGUI](https://github.com/enoch1118/ollamaGUI)
 - [OpenAOE](https://github.com/InternLM/OpenAOE)
 - [Odin Runes](https://github.com/leonid20000/OdinRunes)
 - [LLM-X](https://github.com/mrdjohnson/llm-x) (Progressive Web App)
 - [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
 - [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
 - [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
 - [IntelliBar](https://intellibar.app/) (AI-powered assistant for macOS)
 - [Jirapt](https://github.com/AliAhmedNada/jirapt) (Jira Integration to generate issues, tasks, epics)
 - [ojira](https://github.com/AliAhmedNada/ojira) (Jira chrome plugin to easily generate descriptions for tasks)
 - [QA-Pilot](https://github.com/reid41/QA-Pilot) (Interactive chat tool that can leverage Ollama models for rapid understanding and navigation of GitHub code repositories)
 - [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases)
 - [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG)
 - [RAGFlow](https://github.com/infiniflow/ragflow) (Open-source Retrieval-Augmented Generation engine based on deep document understanding)
 - [StreamDeploy](https://github.com/StreamDeploy-DevRel/streamdeploy-llm-app-scaffold) (LLM Application Scaffold)
 - [chat](https://github.com/swuecho/chat) (chat web app for teams)
 - [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama)
 - [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) (Local Chat with multiple PDFs using Ollama and RAG)
 - [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation)
 - [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends)
 - [RWKV-Runner](https://github.com/josStorer/RWKV-Runner) (RWKV offline LLM deployment tool, also usable as a client for ChatGPT and Ollama)
 - [Ollama Grid Search](https://github.com/dezoito/ollama-grid-search) (app to evaluate and compare models)
 - [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
 - [Casibase](https://casibase.org) (An open source AI knowledge base and dialogue system combining the latest RAG, SSO, ollama support, and multiple large language models.)
 - [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
 - [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
 - [Shinkai Desktop](https://github.com/dcSpark/shinkai-apps) (Two click install Local AI using Ollama + Files + RAG)
 - [AiLama](https://github.com/zeyoyt/ailama) (A Discord User App that allows you to interact with Ollama anywhere in Discord)
 - [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
 - [R2R](https://github.com/SciPhi-AI/R2R) (Open-source RAG engine)
 - [Ollama-Kis](https://github.com/elearningshow/ollama-kis) (A simple easy-to-use GUI with sample custom LLM for Drivers Education)
 - [OpenGPA](https://opengpa.org) (Open-source offline-first Enterprise Agentic Application)
 - [Painting Droid](https://github.com/mateuszmigas/painting-droid) (Painting app with AI integrations)
 - [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
 - [AI Studio](https://github.com/MindWorkAI/AI-Studio)
 - [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
 - [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
 - [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
 - [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
 - [PyGPT](https://github.com/szczyglis-dev/py-gpt) (AI desktop assistant for Linux, Windows, and Mac)
 - [Alpaca](https://github.com/Jeffser/Alpaca) (An Ollama client application for Linux and macOS made with GTK4 and Adwaita)
 - [AutoGPT](https://github.com/Significant-Gravitas/AutoGPT/blob/master/docs/content/platform/ollama.md) (AutoGPT Ollama integration)
 - [Go-CREW](https://www.jonathanhecl.com/go-crew/) (Powerful Offline RAG in Golang)
 - [PartCAD](https://github.com/openvmp/partcad/) (CAD model generation with OpenSCAD and CadQuery)
 - [Ollama4j Web UI](https://github.com/ollama4j/ollama4j-web-ui) - Java-based Web UI for Ollama built with Vaadin, Spring Boot, and Ollama4j
 - [PyOllaMx](https://github.com/kspviswa/pyOllaMx) - macOS application capable of chatting with both Ollama and Apple MLX models.
 - [Cline](https://github.com/cline/cline) - Formerly known as Claude Dev is a VS Code extension for multi-file/whole-repo coding
 - [Void](https://github.com/voideditor/void) (Open source AI code editor and Cursor alternative)
 - [Cherry Studio](https://github.com/kangfenmao/cherry-studio) (Desktop client with Ollama support)
 - [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy-focused LLM chat interface with optional encryption)
 - [Archyve](https://github.com/nickthecook/archyve) (RAG-enabling document library)
 - [crewAI with Mesop](https://github.com/rapidarchitect/ollama-crew-mesop) (Mesop Web Interface to run crewAI with Ollama)
 - [Tkinter-based client](https://github.com/chyok/ollama-gui) (Python tkinter-based Client for Ollama)
 - [LLMChat](https://github.com/trendy-design/llmchat) (Privacy focused, 100% local, intuitive all-in-one chat interface)
 - [Local Multimodal AI Chat](https://github.com/Leon-Sander/Local-Multimodal-AI-Chat) (Ollama-based LLM Chat with support for multiple features, including PDF RAG, voice chat, image-based interactions, and integration with OpenAI.)
 - [ARGO](https://github.com/xark-argo/argo) (Locally download and run Ollama and Huggingface models with RAG and deep research on Mac/Windows/Linux)
 - [OrionChat](https://github.com/EliasPereirah/OrionChat) - OrionChat is a web interface for chatting with different AI providers
 - [G1](https://github.com/bklieger-groq/g1) (Prototype of using prompting strategies to improve the LLM's reasoning through o1-like reasoning chains.)
 - [Web management](https://github.com/lemonit-eric-mao/ollama-web-management) (Web management page)
 - [Promptery](https://github.com/promptery/promptery) (desktop client for Ollama.)
 - [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
 - [chat-ollama](https://github.com/annilq/chat-ollama) (a React Native client for Ollama)
 - [SpaceLlama](https://github.com/tcsenpai/spacellama) (Firefox and Chrome extension to quickly summarize web pages with ollama in a sidebar)
 - [YouLama](https://github.com/tcsenpai/youlama) (Webapp to quickly summarize any YouTube video, supporting Invidious as well)
 - [DualMind](https://github.com/tcsenpai/dualmind) (Experimental app allowing two models to talk to each other in the terminal or in a web interface)
 - [ollamarama-matrix](https://github.com/h1ddenpr0cess20/ollamarama-matrix) (Ollama chatbot for the Matrix chat protocol)
 - [ollama-chat-app](https://github.com/anan1213095357/ollama-chat-app) (Flutter-based chat app)
 - [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard, and said in the meetings)
 - [Hexabot](https://github.com/hexastack/hexabot) (A conversational AI builder)
 - [Reddit Rate](https://github.com/rapidarchitect/reddit_analyzer) (Search and Rate Reddit topics with a weighted summation)
 - [OpenTalkGpt](https://github.com/adarshM84/OpenTalkGpt) (Chrome Extension to manage open-source models supported by Ollama, create custom models, and chat with models from a user-friendly UI)
 - [VT](https://github.com/vinhnx/vt.ai) (A minimal multimodal AI chat app, with dynamic conversation routing. Supports local models via Ollama)
 - [Nosia](https://github.com/nosia-ai/nosia) (Easy to install and use RAG platform based on Ollama)
 - [Witsy](https://github.com/nbonamy/witsy) (An AI Desktop application available for Mac/Windows/Linux)
 - [Abbey](https://github.com/US-Artificial-Intelligence/abbey) (A configurable AI interface server with notebooks, document storage, and YouTube support)
 - [Minima](https://github.com/dmayboroda/minima) (RAG with on-premises or fully local workflow)
 - [aidful-ollama-model-delete](https://github.com/AidfulAI/aidful-ollama-model-delete) (User interface for simplified model cleanup)
 - [Perplexica](https://github.com/ItzCrazyKns/Perplexica) (An AI-powered search engine & an open-source alternative to Perplexity AI)
 - [Ollama Chat WebUI for Docker ](https://github.com/oslook/ollama-webui) (Support for local docker deployment, lightweight ollama webui)
 - [AI Toolkit for Visual Studio Code](https://aka.ms/ai-tooklit/ollama-docs) (Microsoft-official VS Code extension to chat, test, evaluate models with Ollama support, and use them in your AI applications.)
 - [MinimalNextOllamaChat](https://github.com/anilkay/MinimalNextOllamaChat) (Minimal Web UI for Chat and Model Control)
 - [Chipper](https://github.com/TilmanGriesel/chipper) AI interface for tinkerers (Ollama, Haystack RAG, Python)
 - [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
 - [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
 - [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
 - [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivalent endpoint with Ollama support for running locally)
 - [AntSK](https://github.com/AIDotNet/AntSK) (Out-of-the-box & Adaptable RAG Chatbot)
 - [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot)
 - [yla](https://github.com/danielekp/yla) (Web interface to freely interact with your customized models)
 - [LangBot](https://github.com/RockChinQ/LangBot) (LLM-based instant messaging bots platform, with Agents, RAG features, supports multiple platforms)
 - [1Panel](https://github.com/1Panel-dev/1Panel/) (Web-based Linux Server Management Tool)
 - [AstrBot](https://github.com/Soulter/AstrBot/) (User-friendly LLM-based multi-platform chatbot with a WebUI, supporting RAG, LLM agents, and plugins integration)
 - [Reins](https://github.com/ibrahimcetin/reins) (Easily tweak parameters, customize system prompts per chat, and enhance your AI experiments with reasoning model support.)
 - [Flufy](https://github.com/Aharon-Bensadoun/Flufy) (A beautiful chat interface for interacting with Ollama's API. Built with React, TypeScript, and Material-UI.)
 - [Ellama](https://github.com/zeozeozeo/ellama) (Friendly native app to chat with an Ollama instance)
 - [screenpipe](https://github.com/mediar-ai/screenpipe) Build agents powered by your screen history
 - [Ollamb](https://github.com/hengkysteen/ollamb) (Simple yet rich in features, cross-platform built with Flutter and designed for Ollama. Try the [web demo](https://hengkysteen.github.io/demo/ollamb/).)
 - [Writeopia](https://github.com/Writeopia/Writeopia) (Text editor with integration with Ollama)
 - [AppFlowy](https://github.com/AppFlowy-IO/AppFlowy) (AI collaborative workspace with Ollama, cross-platform and self-hostable)
 - [Lumina](https://github.com/cushydigit/lumina.git) (A lightweight, minimal React.js frontend for interacting with Ollama servers)
 - [Tiny Notepad](https://pypi.org/project/tiny-notepad) (A lightweight, notepad-like interface to chat with ollama available on PyPI)
 - [macLlama (macOS native)](https://github.com/hellotunamayo/macLlama) (A native macOS GUI application for interacting with Ollama models, featuring a chat interface.)
 - [GPTranslate](https://github.com/philberndt/GPTranslate) (A fast and lightweight, AI powered desktop translation application written with Rust and Tauri. Features real-time translation with OpenAI/Azure/Ollama.)
 - [ollama launcher](https://github.com/NGC13009/ollama-launcher) (A launcher for Ollama, aiming to provide users with convenient functions such as ollama server launching, management, or configuration.)
 - [ai-hub](https://github.com/Aj-Seven/ai-hub) (AI Hub supports multiple models via API keys and Chat support via Ollama API.)
 - [Mayan EDMS](https://gitlab.com/mayan-edms/mayan-edms) (Open source document management system to organize, tag, search, and automate your files with powerful Ollama driven workflows.)
 - [Serene Pub](https://github.com/doolijb/serene-pub) (Beginner friendly, open source AI Roleplaying App for Windows, Mac OS and Linux. Search, download and use models with Ollama all inside the app.)
 - [Andes](https://github.com/aqerd/andes) (A Visual Studio Code extension that provides a local UI interface for Ollama models)
 - [KDeps](https://github.com/kdeps/kdeps) (Kdeps is an offline-first AI framework for building Dockerized full-stack AI applications declaratively using Apple PKL and integrates APIs with Ollama on the backend.)
 - [Clueless](https://github.com/KashyapTan/clueless) (Open Source & Local Cluely: A desktop application LLM assistant to help you talk to anything on your screen using locally served Ollama models. Also undetectable to screenshare)
 - [ollama-co2](https://github.com/carbonatedWaterOrg/ollama-co2) (FastAPI web interface for monitoring and managing local and remote Ollama servers with real-time model monitoring and concurrent downloads)
 - [Hillnote](https://hillnote.com) (A Markdown-first workspace designed to supercharge your AI workflow. Create documents ready to integrate with Claude, ChatGPT, Gemini, Cursor, and more - all while keeping your work on your device.)
-### Cloud
+#### Web
 - [Open WebUI](https://github.com/open-webui/open-webui) - Extensible, self-hosted AI interface
 - [Onyx](https://github.com/onyx-dot-app/onyx) - Connected AI workspace
 - [LibreChat](https://github.com/danny-avila/LibreChat) - Enhanced ChatGPT clone with multi-provider support
 - [Lobe Chat](https://github.com/lobehub/lobe-chat) - Modern chat framework with plugin ecosystem ([docs](https://lobehub.com/docs/self-hosting/examples/ollama))
 - [NextChat](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web) - Cross-platform ChatGPT UI ([docs](https://docs.nextchat.dev/models/ollama))
 - [Perplexica](https://github.com/ItzCrazyKns/Perplexica) - AI-powered search engine, open-source Perplexity alternative
 - [big-AGI](https://github.com/enricoros/big-AGI) - AI suite for professionals
 - [Lollms WebUI](https://github.com/ParisNeo/lollms-webui) - Multi-model web interface
 - [ChatOllama](https://github.com/sugarforever/chat-ollama) - Chatbot with knowledge bases
 - [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt) - On-premise AI platform
 - [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama) - ChatGPT-style web interface
 - [Hollama](https://github.com/fmaclen/hollama) - Minimal web interface
 - [Chatbox](https://github.com/Bin-Huang/Chatbox) - Desktop and web AI client
 - [chat](https://github.com/swuecho/chat) - Chat web app for teams
 - [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) - Chat with multiple PDFs using RAG
 - [Tkinter-based client](https://github.com/chyok/ollama-gui) - Python desktop client
 #### Desktop
 - [Dify.AI](https://github.com/langgenius/dify) - LLM app development platform
 - [AnythingLLM](https://github.com/Mintplex-Labs/anything-llm) - All-in-one AI app for Mac, Windows, and Linux
 - [Maid](https://github.com/Mobile-Artificial-Intelligence/maid) - Cross-platform mobile and desktop client
 - [Witsy](https://github.com/nbonamy/witsy) - AI desktop app for Mac, Windows, and Linux
 - [Cherry Studio](https://github.com/kangfenmao/cherry-studio) - Multi-provider desktop client
 - [Ollama App](https://github.com/JHubi1/ollama-app) - Multi-platform client for desktop and mobile
 - [PyGPT](https://github.com/szczyglis-dev/py-gpt) - AI desktop assistant for Linux, Windows, and Mac
 - [Alpaca](https://github.com/Jeffser/Alpaca) - GTK4 client for Linux and macOS
 - [SwiftChat](https://github.com/aws-samples/swift-chat) - Cross-platform including iOS, Android, and Apple Vision Pro
 - [Enchanted](https://github.com/AugustDev/enchanted) - Native macOS and iOS client
 - [RWKV-Runner](https://github.com/josStorer/RWKV-Runner) - Multi-model desktop runner
 - [Ollama Grid Search](https://github.com/dezoito/ollama-grid-search) - Evaluate and compare models
 - [macai](https://github.com/Renset/macai) - macOS client for Ollama and ChatGPT
 - [AI Studio](https://github.com/MindWorkAI/AI-Studio) - Multi-provider desktop IDE
 - [Reins](https://github.com/ibrahimcetin/reins) - Parameter tuning and reasoning model support
 - [ConfiChat](https://github.com/1runeberg/confichat) - Privacy-focused with optional encryption
 - [LLocal.in](https://github.com/kartikm7/llocal) - Electron desktop client
 - [MindMac](https://mindmac.app) - AI chat client for Mac
 - [Msty](https://msty.app) - Multi-model desktop client
 - [BoltAI for Mac](https://boltai.com) - AI chat client for Mac
 - [IntelliBar](https://intellibar.app/) - AI-powered assistant for macOS
 - [Kerlig AI](https://www.kerlig.com/) - AI writing assistant for macOS
 - [Hillnote](https://hillnote.com) - Markdown-first AI workspace
 - [Perfect Memory AI](https://www.perfectmemory.ai/) - Productivity AI personalized by screen and meeting history
 #### Mobile
 - [Ollama Android Chat](https://github.com/sunshine0523/OllamaServer) - One-click Ollama on Android
 > SwiftChat, Enchanted, Maid, Ollama App, Reins, and ConfiChat listed above also support mobile platforms.
 ### Code Editors & Development
 - [Cline](https://github.com/cline/cline) - VS Code extension for multi-file/whole-repo coding
 - [Continue](https://github.com/continuedev/continue) - Open-source AI code assistant for any IDE
 - [Void](https://github.com/voideditor/void) - Open source AI code editor, Cursor alternative
 - [Copilot for Obsidian](https://github.com/logancyang/obsidian-copilot) - AI assistant for Obsidian
 - [twinny](https://github.com/rjmacarthy/twinny) - Copilot and Copilot chat alternative
 - [gptel Emacs client](https://github.com/karthink/gptel) - LLM client for Emacs
 - [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) - Use Ollama as GitHub Copilot
 - [Obsidian Local GPT](https://github.com/pfrankov/obsidian-local-gpt) - Local AI for Obsidian
 - [Ellama Emacs client](https://github.com/s-kostyaev/ellama) - LLM tool for Emacs
 - [orbiton](https://github.com/xyproto/orbiton) - Config-free text editor with Ollama tab completion
 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) - Sublime Text 4 AI assistant
 - [VT Code](https://github.com/vinhnx/vtcode) - Rust-based terminal coding agent with Tree-sitter
 - [QodeAssist](https://github.com/Palm1r/QodeAssist) - AI coding assistant for Qt Creator
 - [AI Toolkit for VS Code](https://aka.ms/ai-tooklit/ollama-docs) - Microsoft-official VS Code extension
 - [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama) - Natural language interface for computers
 ### Libraries & SDKs
 - [LiteLLM](https://github.com/BerriAI/litellm) - Unified API for 100+ LLM providers
 - [Semantic Kernel](https://github.com/microsoft/semantic-kernel/tree/main/python/semantic_kernel/connectors/ai/ollama) - Microsoft AI orchestration SDK
 - [LangChain4j](https://github.com/langchain4j/langchain4j) - Java LangChain ([example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java))
 - [LangChainGo](https://github.com/tmc/langchaingo/) - Go LangChain ([example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example))
 - [Spring AI](https://github.com/spring-projects/spring-ai) - Spring framework AI support ([docs](https://docs.spring.io/spring-ai/reference/api/chat/ollama-chat.html))
 - [LangChain](https://python.langchain.com/docs/integrations/chat/ollama/) and [LangChain.js](https://js.langchain.com/docs/integrations/chat/ollama/) with [example](https://js.langchain.com/docs/tutorials/local_rag/)
 - [Ollama for Ruby](https://github.com/crmne/ruby_llm) - Ruby LLM library
 - [any-llm](https://github.com/mozilla-ai/any-llm) - Unified LLM interface by Mozilla
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp) - .NET SDK
 - [LangChainRust](https://github.com/Abraxas-365/langchain-rust) - Rust LangChain ([example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs))
 - [Agents-Flex for Java](https://github.com/agents-flex/agents-flex) - Java agent framework ([example](https://github.com/agents-flex/agents-flex/tree/main/agents-flex-llm/agents-flex-llm-ollama/src/test/java/com/agentsflex/llm/ollama))
 - [Elixir LangChain](https://github.com/brainlid/langchain) - Elixir LangChain
 - [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs) - Rust SDK
 - [LangChain for .NET](https://github.com/tryAGI/LangChain) - .NET LangChain ([example](https://github.com/tryAGI/LangChain/blob/main/examples/LangChain.Samples.OpenAI/Program.cs))
 - [chromem-go](https://github.com/philippgille/chromem-go) - Go vector database with Ollama embeddings ([example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama))
 - [LangChainDart](https://github.com/davidmigloz/langchain_dart) - Dart LangChain
 - [LlmTornado](https://github.com/lofcz/llmtornado) - Unified C# interface for multiple inference APIs
 - [Ollama4j for Java](https://github.com/ollama4j/ollama4j) - Java SDK
 - [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel) - Laravel integration
 - [Ollama for Swift](https://github.com/mattt/ollama-swift) - Swift SDK
 - [LlamaIndex](https://docs.llamaindex.ai/en/stable/examples/llm/ollama/) and [LlamaIndexTS](https://ts.llamaindex.ai/modules/llms/available_llms/ollama) - Data framework for LLM apps
 - [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md) - AI pipeline framework
 - [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama) - Google AI framework
 - [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp) - C++ SDK
 - [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) - Julia LLM toolkit ([example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama))
 - [Ollama for R - rollama](https://github.com/JBGruber/rollama) - R SDK
 - [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama) - AI gateway
 - [Testcontainers](https://testcontainers.com/modules/ollama/) - Container-based testing
 - [LLPhant](https://github.com/theodo-group/LLPhant?tab=readme-ov-file#ollama) - PHP AI framework
 ### Frameworks & Agents
 - [AutoGPT](https://github.com/Significant-Gravitas/AutoGPT/blob/master/docs/content/platform/ollama.md) - Autonomous AI agent platform
 - [crewAI](https://github.com/crewAIInc/crewAI) - Multi-agent orchestration framework
 - [Strands Agents](https://github.com/strands-agents/sdk-python) - Model-driven agent building by AWS
 - [Cheshire Cat](https://github.com/cheshire-cat-ai/core) - AI assistant framework
 - [any-agent](https://github.com/mozilla-ai/any-agent) - Unified agent framework interface by Mozilla
 - [Stakpak](https://github.com/stakpak/agent) - Open source DevOps agent
 - [Hexabot](https://github.com/hexastack/hexabot) - Conversational AI builder
 - [Neuro SAN](https://github.com/cognizant-ai-lab/neuro-san-studio) - Multi-agent orchestration ([docs](https://github.com/cognizant-ai-lab/neuro-san-studio/blob/main/docs/user_guide.md#ollama))
 ### RAG & Knowledge Bases
 - [RAGFlow](https://github.com/infiniflow/ragflow) - RAG engine based on deep document understanding
 - [R2R](https://github.com/SciPhi-AI/R2R) - Open-source RAG engine
 - [MaxKB](https://github.com/1Panel-dev/MaxKB/) - Ready-to-use RAG chatbot
 - [Minima](https://github.com/dmayboroda/minima) - On-premises or fully local RAG
 - [Chipper](https://github.com/TilmanGriesel/chipper) - AI interface with Haystack RAG
 - [ARGO](https://github.com/xark-argo/argo) - RAG and deep research on Mac/Windows/Linux
 - [Archyve](https://github.com/nickthecook/archyve) - RAG-enabling document library
 - [Casibase](https://casibase.org) - AI knowledge base with RAG and SSO
 - [BrainSoup](https://www.nurgo-software.com/products/brainsoup) - Native client with RAG and multi-agent automation
 ### Bots & Messaging
 - [LangBot](https://github.com/RockChinQ/LangBot) - Multi-platform messaging bots with agents and RAG
 - [AstrBot](https://github.com/Soulter/AstrBot/) - Multi-platform chatbot with RAG and plugins
 - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) - TypeScript Discord bot
 - [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram) - Telegram bot
 - [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) - Telegram bot for roleplay
 ### Terminal & CLI
 - [aichat](https://github.com/sigoden/aichat) - All-in-one LLM CLI with Shell Assistant, RAG, and AI tools
 - [oterm](https://github.com/ggozad/oterm) - Terminal client for Ollama
 - [gollama](https://github.com/sammcj/gollama) - Go-based model manager for Ollama
 - [tlm](https://github.com/yusufcanb/tlm) - Local shell copilot
 - [tenere](https://github.com/pythops/tenere) - TUI for LLMs
 - [ParLlama](https://github.com/paulrobello/parllama) - TUI for Ollama
 - [llm-ollama](https://github.com/taketwo/llm-ollama) - Plugin for [Datasette's LLM CLI](https://llm.datasette.io/en/stable/)
 - [ShellOracle](https://github.com/djcopley/ShellOracle) - Shell command suggestions
 - [LLM-X](https://github.com/mrdjohnson/llm-x) - Progressive web app for LLMs
 - [cmdh](https://github.com/pgibler/cmdh) - Natural language to shell commands
 - [VT](https://github.com/vinhnx/vt.ai) - Minimal multimodal AI chat app
 ### Productivity & Apps
 - [AppFlowy](https://github.com/AppFlowy-IO/AppFlowy) - AI collaborative workspace, self-hostable Notion alternative
 - [Screenpipe](https://github.com/mediar-ai/screenpipe) - 24/7 screen and mic recording with AI-powered search
 - [Vibe](https://github.com/thewh1teagle/vibe) - Transcribe and analyze meetings
 - [Page Assist](https://github.com/n4ze3m/page-assist) - Chrome extension for AI-powered browsing
 - [NativeMind](https://github.com/NativeMindBrowser/NativeMindExtension) - Private, on-device browser AI assistant
 - [Ollama Fortress](https://github.com/ParisNeo/ollama_proxy_server) - Security proxy for Ollama
 - [1Panel](https://github.com/1Panel-dev/1Panel/) - Web-based Linux server management
 - [Writeopia](https://github.com/Writeopia/Writeopia) - Text editor with Ollama integration
 - [QA-Pilot](https://github.com/reid41/QA-Pilot) - GitHub code repository understanding
 - [Raycast extension](https://github.com/MassimilianoPasquini97/raycast_ollama) - Ollama in Raycast
 - [Painting Droid](https://github.com/mateuszmigas/painting-droid) - Painting app with AI integrations
 - [Serene Pub](https://github.com/doolijb/serene-pub) - AI roleplaying app
 - [Mayan EDMS](https://gitlab.com/mayan-edms/mayan-edms) - Document management with Ollama workflows
 - [TagSpaces](https://www.tagspaces.org) - File management with [AI tagging](https://docs.tagspaces.org/ai/)
 ### Observability & Monitoring
 - [Opik](https://www.comet.com/docs/opik/cookbook/ollama) - Debug, evaluate, and monitor LLM applications
 - [OpenLIT](https://github.com/openlit/openlit) - OpenTelemetry-native monitoring for Ollama and GPUs
 - [Lunary](https://lunary.ai/docs/integrations/ollama) - LLM observability with analytics and PII masking
 - [Langfuse](https://langfuse.com/docs/integrations/ollama) - Open source LLM observability
 - [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) - AI observability and evaluation for agents
 - [MLflow Tracing](https://mlflow.org/docs/latest/llms/tracing/index.html#automatic-tracing) - Open source LLM observability
 ### Database & Embeddings
 - [pgai](https://github.com/timescale/pgai) - PostgreSQL as a vector database ([guide](https://github.com/timescale/pgai/blob/main/docs/vectorizer-quick-start.md))
 - [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) - Connect Ollama with 200+ data platforms
 - [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) - Embeddable vector database for Go ([example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama))
 - [Kangaroo](https://github.com/dbkangaroo/kangaroo) - AI-powered SQL client
 ### Infrastructure & Deployment
 #### Cloud
 - [Google Cloud](https://cloud.google.com/run/docs/tutorials/gpu-gemma2-with-ollama)
 - [Fly.io](https://fly.io/docs/python/do-more/add-ollama/)
 - [Koyeb](https://www.koyeb.com/deploy/ollama)
 - [Harbor](https://github.com/av/harbor) - Containerized LLM toolkit with Ollama as default backend
-### Tutorial
+#### Package Managers
 - [handy-ollama](https://github.com/datawhalechina/handy-ollama) (Chinese Tutorial for Ollama by [Datawhale ](https://github.com/datawhalechina) - China's Largest Open Source AI Learning Community)
 ### Terminal
 - [oterm](https://github.com/ggozad/oterm)
 - [Ellama Emacs client](https://github.com/s-kostyaev/ellama)
 - [Emacs client](https://github.com/zweifisch/ollama)
 - [neollama](https://github.com/paradoxical-dev/neollama) UI client for interacting with models from within Neovim
 - [gen.nvim](https://github.com/David-Kunz/gen.nvim)
 - [ollama.nvim](https://github.com/nomnivore/ollama.nvim)
 - [ollero.nvim](https://github.com/marco-souza/ollero.nvim)
 - [ollama-chat.nvim](https://github.com/gerazov/ollama-chat.nvim)
 - [ogpt.nvim](https://github.com/huynle/ogpt.nvim)
 - [gptel Emacs client](https://github.com/karthink/gptel)
 - [Oatmeal](https://github.com/dustinblackman/oatmeal)
 - [cmdh](https://github.com/pgibler/cmdh)
 - [ooo](https://github.com/npahlfer/ooo)
 - [shell-pilot](https://github.com/reid41/shell-pilot)(Interact with models via pure shell scripts on Linux or macOS)
 - [tenere](https://github.com/pythops/tenere)
 - [llm-ollama](https://github.com/taketwo/llm-ollama) for [Datasette's LLM CLI](https://llm.datasette.io/en/stable/).
 - [typechat-cli](https://github.com/anaisbetts/typechat-cli)
 - [ShellOracle](https://github.com/djcopley/ShellOracle)
 - [tlm](https://github.com/yusufcanb/tlm)
 - [podman-ollama](https://github.com/ericcurtin/podman-ollama)
 - [gollama](https://github.com/sammcj/gollama)
 - [ParLlama](https://github.com/paulrobello/parllama)
 - [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/)
 - [Ollama Mixture of Experts (MOE) in 50 lines of code](https://github.com/rapidarchitect/ollama_moe)
 - [vim-intelligence-bridge](https://github.com/pepo-ec/vim-intelligence-bridge) Simple interaction of "Ollama" with the Vim editor
 - [x-cmd ollama](https://x-cmd.com/mod/ollama)
 - [bb7](https://github.com/drunkwcodes/bb7)
 - [SwollamaCLI](https://github.com/marcusziade/Swollama) bundled with the Swollama Swift package. [Demo](https://github.com/marcusziade/Swollama?tab=readme-ov-file#cli-usage)
 - [aichat](https://github.com/sigoden/aichat) All-in-one LLM CLI tool featuring Shell Assistant, Chat-REPL, RAG, AI tools & agents, with access to OpenAI, Claude, Gemini, Ollama, Groq, and more.
 - [PowershAI](https://github.com/rrg92/powershai) PowerShell module that brings AI to terminal on Windows, including support for Ollama
 - [DeepShell](https://github.com/Abyss-c0re/deepshell) Your self-hosted AI assistant. Interactive Shell, Files and Folders analysis.
 - [orbiton](https://github.com/xyproto/orbiton) Configuration-free text editor and IDE with support for tab completion with Ollama.
 - [orca-cli](https://github.com/molbal/orca-cli) Ollama Registry CLI Application - Browse, pull, and download models from Ollama Registry in your terminal.
 - [GGUF-to-Ollama](https://github.com/jonathanhecl/gguf-to-ollama) - Importing GGUF to Ollama made easy (multiplatform)
 - [AWS-Strands-With-Ollama](https://github.com/rapidarchitect/ollama_strands) - AWS Strands Agents with Ollama Examples
 - [ollama-multirun](https://github.com/attogram/ollama-multirun) - A bash shell script to run a single prompt against any or all of your locally installed ollama models, saving the output and performance statistics as easily navigable web pages. ([Demo](https://attogram.github.io/ai_test_zone/))
 - [ollama-bash-toolshed](https://github.com/attogram/ollama-bash-toolshed) - Bash scripts to chat with tool using models. Add new tools to your shed with ease. Runs on Ollama.
 - [hle-eval-ollama](https://github.com/mags0ft/hle-eval-ollama) - Runs benchmarks like "Humanity's Last Exam" (HLE) on your favorite local Ollama models and evaluates the quality of their responses
 - [VT Code](https://github.com/vinhnx/vtcode) - VT Code is a Rust-based terminal coding agent with semantic code intelligence via Tree-sitter. Ollama integration for running local/cloud models with configurable endpoints.
 ### Apple Vision Pro
 - [SwiftChat](https://github.com/aws-samples/swift-chat) (Cross-platform AI chat app supporting Apple Vision Pro via "Designed for iPad")
 - [Enchanted](https://github.com/AugustDev/enchanted)
 ### Database
 - [pgai](https://github.com/timescale/pgai) - PostgreSQL as a vector database (Create and search embeddings from Ollama models using pgvector)
  - [Get started guide](https://github.com/timescale/pgai/blob/main/docs/vectorizer-quick-start.md)
 - [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps)
 - [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama)
 - [Kangaroo](https://github.com/dbkangaroo/kangaroo) (AI-powered SQL client and admin tool for popular databases)
 ### Package managers
 - [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
 - [Gentoo](https://github.com/gentoo/guru/tree/master/app-misc/ollama)
 - [Homebrew](https://formulae.brew.sh/formula/ollama)
 - [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
 - [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
 - [Nix package](https://search.nixos.org/packages?show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama)
 - [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
 - [Gentoo](https://github.com/gentoo/guru/tree/master/app-misc/ollama)
 - [Flox](https://flox.dev/blog/ollama-part-one)
-
+- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
 ### Libraries
 - [LangChain](https://python.langchain.com/docs/integrations/chat/ollama/) and [LangChain.js](https://js.langchain.com/docs/integrations/chat/ollama/) with [example](https://js.langchain.com/docs/tutorials/local_rag/)
 - [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama)
 - [crewAI](https://github.com/crewAIInc/crewAI)
 - [Yacana](https://remembersoftwares.github.io/yacana/) (User-friendly multi-agent framework for brainstorming and executing predetermined flows with built-in tool integration)
 - [Strands Agents](https://github.com/strands-agents/sdk-python) (A model-driven approach to building AI agents in just a few lines of code)
 - [Spring AI](https://github.com/spring-projects/spring-ai) with [reference](https://docs.spring.io/spring-ai/reference/api/chat/ollama-chat.html) and [example](https://github.com/tzolov/ollama-tools)
 - [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
 - [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
 - [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
 - [LangChain for .NET](https://github.com/tryAGI/LangChain) with [example](https://github.com/tryAGI/LangChain/blob/main/examples/LangChain.Samples.OpenAI/Program.cs)
 - [LLPhant](https://github.com/theodo-group/LLPhant?tab=readme-ov-file#ollama)
 - [LlamaIndex](https://docs.llamaindex.ai/en/stable/examples/llm/ollama/) and [LlamaIndexTS](https://ts.llamaindex.ai/modules/llms/available_llms/ollama)
 - [LiteLLM](https://github.com/BerriAI/litellm)
 - [OllamaFarm for Go](https://github.com/presbrey/ollamafarm)
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
 - [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
 - [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
 - [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
 - [Ollama4j for Java](https://github.com/ollama4j/ollama4j)
 - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
 - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
 - [Ollama for Dart](https://github.com/breitburg/dart-ollama)
 - [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel)
 - [LangChainDart](https://github.com/davidmigloz/langchain_dart)
 - [Semantic Kernel - Python](https://github.com/microsoft/semantic-kernel/tree/main/python/semantic_kernel/connectors/ai/ollama)
 - [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md)
 - [Elixir LangChain](https://github.com/brainlid/langchain)
 - [Ollama for R - rollama](https://github.com/JBGruber/rollama)
 - [Ollama for R - ollama-r](https://github.com/hauselin/ollama-r)
 - [Ollama-ex for Elixir](https://github.com/lebrunel/ollama-ex)
 - [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama)
 - [Testcontainers](https://testcontainers.com/modules/ollama/)
 - [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama)
 - [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama)
 - [LlamaScript](https://github.com/Project-Llama/llamascript)
 - [llm-axe](https://github.com/emirsahin1/llm-axe) (Python Toolkit for Building LLM Powered Apps)
 - [Gollm](https://docs.gollm.co/examples/ollama-example)
 - [Gollama for Golang](https://github.com/jonathanhecl/gollama)
 - [Ollamaclient for Golang](https://github.com/xyproto/ollamaclient)
 - [High-level function abstraction in Go](https://gitlab.com/tozd/go/fun)
 - [Ollama PHP](https://github.com/ArdaGnsrn/ollama-php)
 - [Agents-Flex for Java](https://github.com/agents-flex/agents-flex) with [example](https://github.com/agents-flex/agents-flex/tree/main/agents-flex-llm/agents-flex-llm-ollama/src/test/java/com/agentsflex/llm/ollama)
 - [Parakeet](https://github.com/parakeet-nest/parakeet) is a GoLang library, made to simplify the development of small generative AI applications with Ollama.
 - [Haverscript](https://github.com/andygill/haverscript) with [examples](https://github.com/andygill/haverscript/tree/main/examples)
 - [Ollama for Swift](https://github.com/mattt/ollama-swift)
 - [Swollama for Swift](https://github.com/guitaripod/Swollama) with [DocC](https://guitaripod.github.io/Swollama/documentation/swollama)
 - [GoLamify](https://github.com/prasad89/golamify)
 - [Ollama for Haskell](https://github.com/tusharad/ollama-haskell)
 - [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in a unified API)
 - [LlmTornado](https://github.com/lofcz/llmtornado) (C# library providing a unified interface for major FOSS & Commercial inference APIs)
 - [Ollama for Zig](https://github.com/dravenk/ollama-zig)
 - [Abso](https://github.com/lunary-ai/abso) (OpenAI-compatible TypeScript SDK for any LLM provider)
 - [Nichey](https://github.com/goodreasonai/nichey) is a Python package for generating custom wikis for your research topic
 - [Ollama for D](https://github.com/kassane/ollama-d)
 - [OllamaPlusPlus](https://github.com/HardCodeDev777/OllamaPlusPlus) (Very simple C++ library for Ollama)
 - [any-llm](https://github.com/mozilla-ai/any-llm) (A single interface to use different llm providers by [mozilla.ai](https://www.mozilla.ai/))
 - [any-agent](https://github.com/mozilla-ai/any-agent) (A single interface to use and evaluate different agent frameworks by [mozilla.ai](https://www.mozilla.ai/))
 - [Neuro SAN](https://github.com/cognizant-ai-lab/neuro-san-studio) (Data-driven multi-agent orchestration framework) with [example](https://github.com/cognizant-ai-lab/neuro-san-studio/blob/main/docs/user_guide.md#ollama)
 - [achatbot-go](https://github.com/ai-bot-pro/achatbot-go) a multimodal(text/audio/image) chatbot.
 - [Ollama Bash Lib](https://github.com/attogram/ollama-bash-lib) - A Bash Library for Ollama. Run LLM prompts straight from your shell, and more
 ### Mobile
 - [SwiftChat](https://github.com/aws-samples/swift-chat) (Lightning-fast Cross-platform AI chat app with native UI for Android, iOS, and iPad)
 - [Enchanted](https://github.com/AugustDev/enchanted)
 - [Maid](https://github.com/Mobile-Artificial-Intelligence/maid)
 - [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
 - [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy-focused LLM chat interface with optional encryption)
 - [Ollama Android Chat](https://github.com/sunshine0523/OllamaServer) (No need for Termux, start the Ollama service with one click on an Android device)
 - [Reins](https://github.com/ibrahimcetin/reins) (Easily tweak parameters, customize system prompts per chat, and enhance your AI experiments with reasoning model support.)
 ### Extensions & Plugins
 - [Raycast extension](https://github.com/MassimilianoPasquini97/raycast_ollama)
 - [Discollama](https://github.com/mxyng/discollama) (Discord bot inside the Ollama discord channel)
 - [Continue](https://github.com/continuedev/continue)
 - [Vibe](https://github.com/thewh1teagle/vibe) (Transcribe and analyze meetings with Ollama)
 - [Obsidian Ollama plugin](https://github.com/hinterdupfinger/obsidian-ollama)
 - [Logseq Ollama plugin](https://github.com/omagdy7/ollama-logseq)
 - [NotesOllama](https://github.com/andersrex/notesollama) (Apple Notes Ollama plugin)
 - [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
 - [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
 - [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
 - [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
 - [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
 - [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
 - [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support)
 - [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
 - [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
 - [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
 - [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
 - [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use Ollama as a copilot like GitHub Copilot)
 - [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
 - [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face)
 - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
 - [Plasmoid Ollama Control](https://github.com/imoize/plasmoid-ollamacontrol) (KDE Plasma extension that allows you to quickly manage/control Ollama model)
 - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
 - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
 - [ChatGPTBox: All in one browser extension](https://github.com/josStorer/chatGPTBox) with [Integrating Tutorial](https://github.com/josStorer/chatGPTBox/issues/616#issuecomment-1975186467)
 - [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
 - [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depend on ollama server)
 - [Terraform AWS Ollama & Open WebUI](https://github.com/xuyangbocn/terraform-aws-self-host-llm) (A Terraform module to deploy on AWS a ready-to-use Ollama service, together with its front-end Open WebUI service.)
 - [node-red-contrib-ollama](https://github.com/jakubburkiewicz/node-red-contrib-ollama)
 - [Local AI Helper](https://github.com/ivostoykov/localAI) (Chrome and Firefox extensions that enable interactions with the active tab and customisable API endpoints. Includes secure storage for user prompts.)
 - [LSP-AI](https://github.com/SilasMarvin/lsp-ai) (Open-source language server for AI-powered functionality)
 - [QodeAssist](https://github.com/Palm1r/QodeAssist) (AI-powered coding assistant plugin for Qt Creator)
 - [Obsidian Quiz Generator plugin](https://github.com/ECuiDev/obsidian-quiz-generator)
 - [AI Summary Helper plugin](https://github.com/philffm/ai-summary-helper)
 - [TextCraft](https://github.com/suncloudsmoon/TextCraft) (Copilot in Word alternative using Ollama)
 - [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
 - [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language
 - [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
 - [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) (telegram bot, primary for RP. Oobabooga-like buttons, [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) API integration e.t.c)
 - [mcp-llm](https://github.com/sammcj/mcp-llm) (MCP Server to allow LLMs to call other LLMs)
 - [SimpleOllamaUnity](https://github.com/HardCodeDev777/SimpleOllamaUnity) (Unity Engine extension for communicating with Ollama in a few lines of code. Also works at runtime)
 - [UnityCodeLama](https://github.com/HardCodeDev777/UnityCodeLama) (Unity Editor tool to analyze scripts via Ollama)
 - [NativeMind](https://github.com/NativeMindBrowser/NativeMindExtension) (Private, on-device AI Assistant, no cloud dependencies)
 - [GMAI - Gradle Managed AI](https://gmai.premex.se/) (Gradle plugin for automated Ollama lifecycle management during build phases)
 - [NOMYO Router](https://github.com/nomyo-ai/nomyo-router) (A transparent Ollama proxy with model deployment aware routing which auto-manages multiple Ollama instances in a given network)
 ### Supported backends
 - [llama.cpp](https://github.com/ggml-org/llama.cpp) project founded by Georgi Gerganov.
 ### Observability
 - [Opik](https://www.comet.com/docs/opik/cookbook/ollama) is an open-source platform to debug, evaluate, and monitor your LLM applications, RAG systems, and agentic workflows with comprehensive tracing, automated evaluations, and production-ready dashboards. Opik supports native integration to Ollama.
 - [Lunary](https://lunary.ai/docs/integrations/ollama) is the leading open-source LLM observability platform. It provides a variety of enterprise-grade features such as real-time analytics, prompt templates management, PII masking, and comprehensive agent tracing.
 - [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
 - [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) is an AI observability and evaluation platform for AI agents. Use HoneyHive to evaluate agent performance, interrogate failures, and monitor quality in production.
 - [Langfuse](https://langfuse.com/docs/integrations/ollama) is an open source LLM observability platform that enables teams to collaboratively monitor, evaluate and debug AI applications.
 - [MLflow Tracing](https://mlflow.org/docs/latest/llms/tracing/index.html#automatic-tracing) is an open source LLM observability tool with a convenient API to log and visualize traces, making it easy to debug and evaluate GenAI applications.
 ### Security
 - [Ollama Fortress](https://github.com/ParisNeo/ollama_proxy_server)
--- a/anthropic/anthropic.go
+++ b/anthropic/anthropic.go
@@ -1,17 +1,25 @@
 package anthropic
 import (
 	"bytes"
 	"context"
 	"crypto/rand"
 	"encoding/base64"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"log/slog"
 	"net/http"
 	"net/url"
 	"strconv"
 	"strings"
 	"time"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/auth"
 	internalcloud "github.com/ollama/ollama/internal/cloud"
 	"github.com/ollama/ollama/logutil"
 )
 // Error types matching Anthropic API
@@ -60,7 +68,7 @@ type MessagesRequest struct {
 	Model         string          `json:"model"`
 	MaxTokens     int             `json:"max_tokens"`
 	Messages      []MessageParam  `json:"messages"`
-	System        any             `json:"system,omitempty"` // string or []ContentBlock
+	System        any             `json:"system,omitempty"` // string or []map[string]any (JSON-decoded ContentBlock)
 	Stream        bool            `json:"stream,omitempty"`
 	Temperature   *float64        `json:"temperature,omitempty"`
 	TopP          *float64        `json:"top_p,omitempty"`
@@ -74,30 +82,52 @@ type MessagesRequest struct {
 // MessageParam represents a message in the request
 type MessageParam struct {
-	Role    string `json:"role"`    // "user" or "assistant"
+	Role    string         `json:"role"`    // "user" or "assistant"
-	Content any    `json:"content"` // string or []ContentBlock
+	Content []ContentBlock `json:"content"` // always []ContentBlock; plain strings are normalized on unmarshal
 }
 func (m *MessageParam) UnmarshalJSON(data []byte) error {
 	var raw struct {
 		Role    string          `json:"role"`
 		Content json.RawMessage `json:"content"`
 	}
 	if err := json.Unmarshal(data, &raw); err != nil {
 		return err
 	}
 	m.Role = raw.Role
 	var s string
 	if err := json.Unmarshal(raw.Content, &s); err == nil {
 		m.Content = []ContentBlock{{Type: "text", Text: &s}}
 		return nil
 	}
 	return json.Unmarshal(raw.Content, &m.Content)
 }
 // ContentBlock represents a content block in a message.
 // Text and Thinking use pointers so they serialize as the field being present (even if empty)
 // only when set, which is required for SDK streaming accumulation.
 type ContentBlock struct {
-	Type string `json:"type"` // text, image, tool_use, tool_result, thinking
+	Type string `json:"type"` // text, image, tool_use, tool_result, thinking, server_tool_use, web_search_tool_result
 	// For text blocks - pointer so field only appears when set (SDK requires it for accumulation)
 	Text *string `json:"text,omitempty"`
 	// For text blocks with citations
 	Citations []Citation `json:"citations,omitempty"`
 	// For image blocks
 	Source *ImageSource `json:"source,omitempty"`
-	// For tool_use blocks
+	// For tool_use and server_tool_use blocks
-	ID    string `json:"id,omitempty"`
+	ID    string                        `json:"id,omitempty"`
-	Name  string `json:"name,omitempty"`
+	Name  string                        `json:"name,omitempty"`
-	Input any    `json:"input,omitempty"`
+	Input api.ToolCallFunctionArguments `json:"input,omitzero"`
-	// For tool_result blocks
+	// For tool_result and web_search_tool_result blocks
 	ToolUseID string `json:"tool_use_id,omitempty"`
-	Content   any    `json:"content,omitempty"` // string or []ContentBlock
+	Content   any    `json:"content,omitempty"` // string, []ContentBlock, []WebSearchResult, or WebSearchToolResultError
 	IsError   bool   `json:"is_error,omitempty"`
 	// For thinking blocks - pointer so field only appears when set (SDK requires it for accumulation)
@@ -105,6 +135,30 @@ type ContentBlock struct {
 	Signature string  `json:"signature,omitempty"`
 }
 // Citation represents a citation in a text block
 type Citation struct {
 	Type           string `json:"type"` // "web_search_result_location"
 	URL            string `json:"url"`
 	Title          string `json:"title"`
 	EncryptedIndex string `json:"encrypted_index,omitempty"`
 	CitedText      string `json:"cited_text,omitempty"`
 }
 // WebSearchResult represents a single web search result
 type WebSearchResult struct {
 	Type             string `json:"type"` // "web_search_result"
 	URL              string `json:"url"`
 	Title            string `json:"title"`
 	EncryptedContent string `json:"encrypted_content,omitempty"`
 	PageAge          string `json:"page_age,omitempty"`
 }
 // WebSearchToolResultError represents an error from web search
 type WebSearchToolResultError struct {
 	Type      string `json:"type"` // "web_search_tool_result_error"
 	ErrorCode string `json:"error_code"`
 }
 // ImageSource represents the source of an image
 type ImageSource struct {
 	Type      string `json:"type"` // "base64" or "url"
@@ -115,10 +169,13 @@ type ImageSource struct {
 // Tool represents a tool definition
 type Tool struct {
-	Type        string          `json:"type,omitempty"` // "custom" for user-defined tools
+	Type        string          `json:"type,omitempty"` // "custom" for user-defined tools, or "web_search_20250305" for web search
 	Name        string          `json:"name"`
 	Description string          `json:"description,omitempty"`
 	InputSchema json.RawMessage `json:"input_schema,omitempty"`
 	// Web search specific fields
 	MaxUses int `json:"max_uses,omitempty"`
 }
 // ToolChoice controls how the model uses tools
@@ -211,6 +268,7 @@ type MessageDelta struct {
 // DeltaUsage contains cumulative token usage
 type DeltaUsage struct {
 	InputTokens  int `json:"input_tokens"`
 	OutputTokens int `json:"output_tokens"`
 }
@@ -232,6 +290,8 @@ type StreamErrorEvent struct {
 // FromMessagesRequest converts an Anthropic MessagesRequest to an Ollama api.ChatRequest
 func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) {
 	logutil.Trace("anthropic: converting request", "req", TraceMessagesRequest(r))
 	var messages []api.Message
 	if r.System != nil {
@@ -258,9 +318,10 @@ func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) {
 		}
 	}
-	for _, msg := range r.Messages {
+	for i, msg := range r.Messages {
 		converted, err := convertMessage(msg)
 		if err != nil {
 			logutil.Trace("anthropic: message conversion failed", "index", i, "role", msg.Role, "err", err)
 			return nil, err
 		}
 		messages = append(messages, converted...)
@@ -287,8 +348,24 @@ func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) {
 	}
 	var tools api.Tools
 	hasBuiltinWebSearch := false
 	for _, t := range r.Tools {
-		tool, err := convertTool(t)
+		if strings.HasPrefix(t.Type, "web_search") {
 			hasBuiltinWebSearch = true
 			break
 		}
 	}
 	for _, t := range r.Tools {
 		// Anthropic built-in web_search maps to Ollama function name "web_search".
 		// If a user-defined tool also uses that name in the same request, drop the
 		// user-defined one to avoid ambiguous tool-call routing.
 		if hasBuiltinWebSearch && !strings.HasPrefix(t.Type, "web_search") && t.Name == "web_search" {
 			logutil.Trace("anthropic: dropping colliding custom web_search tool", "tool", TraceTool(t))
 			continue
 		}
 		tool, _, err := convertTool(t)
 		if err != nil {
 			return nil, err
 		}
@@ -301,15 +378,17 @@ func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) {
 	}
 	stream := r.Stream
-
+	convertedRequest := &api.ChatRequest{
 	return &api.ChatRequest{
 		Model:    r.Model,
 		Messages: messages,
 		Options:  options,
 		Stream:   &stream,
 		Tools:    tools,
 		Think:    think,
-	}, nil
+	}
 	logutil.Trace("anthropic: converted request", "req", TraceChatRequest(convertedRequest))
 	return convertedRequest, nil
 }
 // convertMessage converts an Anthropic MessageParam to Ollama api.Message(s)
@@ -317,129 +396,236 @@ func convertMessage(msg MessageParam) ([]api.Message, error) {
 	var messages []api.Message
 	role := strings.ToLower(msg.Role)
-	switch content := msg.Content.(type) {
+	var textContent strings.Builder
-	case string:
+	var images []api.ImageData
-		messages = append(messages, api.Message{Role: role, Content: content})
+	var toolCalls []api.ToolCall
 	var thinking string
 	var toolResults []api.Message
 	textBlocks := 0
 	imageBlocks := 0
 	toolUseBlocks := 0
 	toolResultBlocks := 0
 	serverToolUseBlocks := 0
 	webSearchToolResultBlocks := 0
 	thinkingBlocks := 0
 	unknownBlocks := 0
-	case []any:
+	for _, block := range msg.Content {
-		var textContent strings.Builder
+		switch block.Type {
-		var images []api.ImageData
+		case "text":
-		var toolCalls []api.ToolCall
+			textBlocks++
-		var thinking string
+			if block.Text != nil {
-		var toolResults []api.Message
+				textContent.WriteString(*block.Text)
 		for _, block := range content {
 			blockMap, ok := block.(map[string]any)
 			if !ok {
 				return nil, errors.New("invalid content block format")
 			}
-			blockType, _ := blockMap["type"].(string)
+		case "image":
 			imageBlocks++
 			if block.Source == nil {
 				logutil.Trace("anthropic: invalid image source", "role", role)
 				return nil, errors.New("invalid image source")
 			}
-			switch blockType {
+			if block.Source.Type == "base64" {
-			case "text":
+				decoded, err := base64.StdEncoding.DecodeString(block.Source.Data)
-				if text, ok := blockMap["text"].(string); ok {
+				if err != nil {
-					textContent.WriteString(text)
+					logutil.Trace("anthropic: invalid base64 image data", "role", role, "error", err)
 					return nil, fmt.Errorf("invalid base64 image data: %w", err)
 				}
 				images = append(images, decoded)
 			} else {
 				logutil.Trace("anthropic: unsupported image source type", "role", role, "source_type", block.Source.Type)
 				return nil, fmt.Errorf("invalid image source type: %s. Only base64 images are supported.", block.Source.Type)
 			}
-			case "image":
+		case "tool_use":
-				source, ok := blockMap["source"].(map[string]any)
+			toolUseBlocks++
-				if !ok {
+			if block.ID == "" {
-					return nil, errors.New("invalid image source")
+				logutil.Trace("anthropic: tool_use block missing id", "role", role)
-				}
+				return nil, errors.New("tool_use block missing required 'id' field")
 			}
 			if block.Name == "" {
 				logutil.Trace("anthropic: tool_use block missing name", "role", role)
 				return nil, errors.New("tool_use block missing required 'name' field")
 			}
 			toolCalls = append(toolCalls, api.ToolCall{
 				ID: block.ID,
 				Function: api.ToolCallFunction{
 					Name:      block.Name,
 					Arguments: block.Input,
 				},
 			})
-				sourceType, _ := source["type"].(string)
+		case "tool_result":
-				if sourceType == "base64" {
+			toolResultBlocks++
-					data, _ := source["data"].(string)
+			var resultContent string
 					decoded, err := base64.StdEncoding.DecodeString(data)
 					if err != nil {
 						return nil, fmt.Errorf("invalid base64 image data: %w", err)
 					}
 					images = append(images, decoded)
 				} else {
 					return nil, fmt.Errorf("invalid image source type: %s. Only base64 images are supported.", sourceType)
 				}
 				// URL images would need to be fetched - skip for now
-			case "tool_use":
+			switch c := block.Content.(type) {
-				id, ok := blockMap["id"].(string)
+			case string:
-				if !ok {
+				resultContent = c
-					return nil, errors.New("tool_use block missing required 'id' field")
+			case []any:
-				}
+				for _, cb := range c {
-				name, ok := blockMap["name"].(string)
+					if cbMap, ok := cb.(map[string]any); ok {
-				if !ok {
+						if cbMap["type"] == "text" {
-					return nil, errors.New("tool_use block missing required 'name' field")
+							if text, ok := cbMap["text"].(string); ok {
-				}
+								resultContent += text
 				tc := api.ToolCall{
 					ID: id,
 					Function: api.ToolCallFunction{
 						Name: name,
 					},
 				}
 				if input, ok := blockMap["input"].(map[string]any); ok {
 					tc.Function.Arguments = mapToArgs(input)
 				}
 				toolCalls = append(toolCalls, tc)
 			case "tool_result":
 				toolUseID, _ := blockMap["tool_use_id"].(string)
 				var resultContent string
 				switch c := blockMap["content"].(type) {
 				case string:
 					resultContent = c
 				case []any:
 					for _, cb := range c {
 						if cbMap, ok := cb.(map[string]any); ok {
 							if cbMap["type"] == "text" {
 								if text, ok := cbMap["text"].(string); ok {
 									resultContent += text
 								}
 							}
 						}
 					}
 				}
 				toolResults = append(toolResults, api.Message{
 					Role:       "tool",
 					Content:    resultContent,
 					ToolCallID: toolUseID,
 				})
 			case "thinking":
 				if t, ok := blockMap["thinking"].(string); ok {
 					thinking = t
 				}
 			}
 		}
-		if textContent.Len() > 0 || len(images) > 0 || len(toolCalls) > 0 || thinking != "" {
+			toolResults = append(toolResults, api.Message{
-			m := api.Message{
+				Role:       "tool",
-				Role:      role,
+				Content:    resultContent,
-				Content:   textContent.String(),
+				ToolCallID: block.ToolUseID,
-				Images:    images,
+			})
-				ToolCalls: toolCalls,
+
-				Thinking:  thinking,
+		case "thinking":
 			thinkingBlocks++
 			if block.Thinking != nil {
 				thinking = *block.Thinking
 			}
-			messages = append(messages, m)
+
 		case "server_tool_use":
 			serverToolUseBlocks++
 			toolCalls = append(toolCalls, api.ToolCall{
 				ID: block.ID,
 				Function: api.ToolCallFunction{
 					Name:      block.Name,
 					Arguments: block.Input,
 				},
 			})
 		case "web_search_tool_result":
 			webSearchToolResultBlocks++
 			toolResults = append(toolResults, api.Message{
 				Role:       "tool",
 				Content:    formatWebSearchToolResultContent(block.Content),
 				ToolCallID: block.ToolUseID,
 			})
 		default:
 			unknownBlocks++
 		}
 		// Add tool results as separate messages
 		messages = append(messages, toolResults...)
 	default:
 		return nil, fmt.Errorf("invalid message content type: %T", content)
 	}
 	if textContent.Len() > 0 || len(images) > 0 || len(toolCalls) > 0 || thinking != "" {
 		m := api.Message{
 			Role:      role,
 			Content:   textContent.String(),
 			Images:    images,
 			ToolCalls: toolCalls,
 			Thinking:  thinking,
 		}
 		messages = append(messages, m)
 	}
 	// Add tool results as separate messages
 	messages = append(messages, toolResults...)
 	logutil.Trace("anthropic: converted block message",
 		"role", role,
 		"blocks", len(msg.Content),
 		"text", textBlocks,
 		"image", imageBlocks,
 		"tool_use", toolUseBlocks,
 		"tool_result", toolResultBlocks,
 		"server_tool_use", serverToolUseBlocks,
 		"web_search_result", webSearchToolResultBlocks,
 		"thinking", thinkingBlocks,
 		"unknown", unknownBlocks,
 		"messages", TraceAPIMessages(messages),
 	)
 	return messages, nil
 }
-// convertTool converts an Anthropic Tool to an Ollama api.Tool
+func formatWebSearchToolResultContent(content any) string {
-func convertTool(t Tool) (api.Tool, error) {
+	switch c := content.(type) {
 	case string:
 		return c
 	case []WebSearchResult:
 		var resultContent strings.Builder
 		for _, item := range c {
 			if item.Type != "web_search_result" {
 				continue
 			}
 			fmt.Fprintf(&resultContent, "- %s: %s\n", item.Title, item.URL)
 		}
 		return resultContent.String()
 	case []any:
 		var resultContent strings.Builder
 		for _, item := range c {
 			itemMap, ok := item.(map[string]any)
 			if !ok {
 				continue
 			}
 			switch itemMap["type"] {
 			case "web_search_result":
 				title, _ := itemMap["title"].(string)
 				url, _ := itemMap["url"].(string)
 				fmt.Fprintf(&resultContent, "- %s: %s\n", title, url)
 			case "web_search_tool_result_error":
 				errorCode, _ := itemMap["error_code"].(string)
 				if errorCode == "" {
 					return "web_search_tool_result_error"
 				}
 				return "web_search_tool_result_error: " + errorCode
 			}
 		}
 		return resultContent.String()
 	case map[string]any:
 		if c["type"] == "web_search_tool_result_error" {
 			errorCode, _ := c["error_code"].(string)
 			if errorCode == "" {
 				return "web_search_tool_result_error"
 			}
 			return "web_search_tool_result_error: " + errorCode
 		}
 		data, err := json.Marshal(c)
 		if err != nil {
 			return ""
 		}
 		return string(data)
 	case WebSearchToolResultError:
 		if c.ErrorCode == "" {
 			return "web_search_tool_result_error"
 		}
 		return "web_search_tool_result_error: " + c.ErrorCode
 	default:
 		data, err := json.Marshal(c)
 		if err != nil {
 			return ""
 		}
 		return string(data)
 	}
 }
 // convertTool converts an Anthropic Tool to an Ollama api.Tool, returning true if it's a server tool
 func convertTool(t Tool) (api.Tool, bool, error) {
 	if strings.HasPrefix(t.Type, "web_search") {
 		props := api.NewToolPropertiesMap()
 		props.Set("query", api.ToolProperty{
 			Type:        api.PropertyType{"string"},
 			Description: "The search query to look up on the web",
 		})
 		return api.Tool{
 			Type: "function",
 			Function: api.ToolFunction{
 				Name:        "web_search",
 				Description: "Search the web for current information. Use this to find up-to-date information about any topic.",
 				Parameters: api.ToolFunctionParameters{
 					Type:       "object",
 					Required:   []string{"query"},
 					Properties: props,
 				},
 			},
 		}, true, nil
 	}
 	var params api.ToolFunctionParameters
 	if len(t.InputSchema) > 0 {
 		if err := json.Unmarshal(t.InputSchema, &params); err != nil {
-			return api.Tool{}, fmt.Errorf("invalid input_schema for tool %q: %w", t.Name, err)
+			logutil.Trace("anthropic: invalid tool schema", "tool", t.Name, "err", err)
 			return api.Tool{}, false, fmt.Errorf("invalid input_schema for tool %q: %w", t.Name, err)
 		}
 	}
@@ -450,7 +636,7 @@ func convertTool(t Tool) (api.Tool, error) {
 			Description: t.Description,
 			Parameters:  params,
 		},
-	}, nil
+	}, false, nil
 }
 // ToMessagesResponse converts an Ollama api.ChatResponse to an Anthropic MessagesResponse
@@ -517,24 +703,26 @@ func mapStopReason(reason string, hasToolCalls bool) string {
 // StreamConverter manages state for converting Ollama streaming responses to Anthropic format
 type StreamConverter struct {
-	ID              string
+	ID                   string
-	Model           string
+	Model                string
-	firstWrite      bool
+	firstWrite           bool
-	contentIndex    int
+	contentIndex         int
-	inputTokens     int
+	inputTokens          int
-	outputTokens    int
+	outputTokens         int
-	thinkingStarted bool
+	estimatedInputTokens int // Estimated tokens from request (used when actual metrics are 0)
-	thinkingDone    bool
+	thinkingStarted      bool
-	textStarted     bool
+	thinkingDone         bool
-	toolCallsSent   map[string]bool
+	textStarted          bool
 	toolCallsSent        map[string]bool
 }
-func NewStreamConverter(id, model string) *StreamConverter {
+func NewStreamConverter(id, model string, estimatedInputTokens int) *StreamConverter {
 	return &StreamConverter{
-		ID:            id,
+		ID:                   id,
-		Model:         model,
+		Model:                model,
-		firstWrite:    true,
+		firstWrite:           true,
-		toolCallsSent: make(map[string]bool),
+		estimatedInputTokens: estimatedInputTokens,
 		toolCallsSent:        make(map[string]bool),
 	}
 }
@@ -550,7 +738,11 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent {
 	if c.firstWrite {
 		c.firstWrite = false
 		// Use actual metrics if available, otherwise use estimate
 		c.inputTokens = r.Metrics.PromptEvalCount
 		if c.inputTokens == 0 && c.estimatedInputTokens > 0 {
 			c.inputTokens = c.estimatedInputTokens
 		}
 		events = append(events, StreamEvent{
 			Event: "message_start",
@@ -646,6 +838,19 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent {
 			continue
 		}
 		// Close thinking block if still open (thinking → tool_use without text in between)
 		if c.thinkingStarted && !c.thinkingDone {
 			c.thinkingDone = true
 			events = append(events, StreamEvent{
 				Event: "content_block_stop",
 				Data: ContentBlockStopEvent{
 					Type:  "content_block_stop",
 					Index: c.contentIndex,
 				},
 			})
 			c.contentIndex++
 		}
 		if c.textStarted {
 			events = append(events, StreamEvent{
 				Event: "content_block_stop",
@@ -663,7 +868,6 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent {
 			slog.Error("failed to marshal tool arguments", "error", err, "tool_id", tc.ID)
 			continue
 		}
 		events = append(events, StreamEvent{
 			Event: "content_block_start",
 			Data: ContentBlockStartEvent{
@@ -673,7 +877,7 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent {
 					Type:  "tool_use",
 					ID:    tc.ID,
 					Name:  tc.Function.Name,
-					Input: map[string]any{},
+					Input: api.NewToolCallFunctionArguments(),
 				},
 			},
 		})
@@ -721,6 +925,7 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent {
 			})
 		}
 		c.inputTokens = r.Metrics.PromptEvalCount
 		c.outputTokens = r.Metrics.EvalCount
 		stopReason := mapStopReason(r.DoneReason, len(c.toolCallsSent) > 0)
@@ -732,6 +937,7 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent {
 					StopReason: stopReason,
 				},
 				Usage: DeltaUsage{
 					InputTokens:  c.inputTokens,
 					OutputTokens: c.outputTokens,
 				},
 			},
@@ -768,11 +974,216 @@ func ptr(s string) *string {
 	return &s
 }
-// mapToArgs converts a map to ToolCallFunctionArguments
+// CountTokensRequest represents an Anthropic count_tokens request
-func mapToArgs(m map[string]any) api.ToolCallFunctionArguments {
+type CountTokensRequest struct {
-	args := api.NewToolCallFunctionArguments()
+	Model    string          `json:"model"`
-	for k, v := range m {
+	Messages []MessageParam  `json:"messages"`
-		args.Set(k, v)
+	System   any             `json:"system,omitempty"`
-	}
+	Tools    []Tool          `json:"tools,omitempty"`
-	return args
+	Thinking *ThinkingConfig `json:"thinking,omitempty"`
 }
 // EstimateInputTokens estimates input tokens from a MessagesRequest (reuses CountTokensRequest logic)
 func EstimateInputTokens(req MessagesRequest) int {
 	return estimateTokens(CountTokensRequest{
 		Model:    req.Model,
 		Messages: req.Messages,
 		System:   req.System,
 		Tools:    req.Tools,
 		Thinking: req.Thinking,
 	})
 }
 // CountTokensResponse represents an Anthropic count_tokens response
 type CountTokensResponse struct {
 	InputTokens int `json:"input_tokens"`
 }
 // estimateTokens returns a rough estimate of tokens (len/4).
 // TODO: Replace with actual tokenization via Tokenize API for accuracy.
 // Current len/4 heuristic is a rough approximation (~4 chars/token average).
 func estimateTokens(req CountTokensRequest) int {
 	var totalLen int
 	// Count system prompt
 	totalLen += countAnyContent(req.System)
 	for _, msg := range req.Messages {
 		// Count role (always present)
 		totalLen += len(msg.Role)
 		// Count content
 		totalLen += countAnyContent(msg.Content)
 	}
 	for _, tool := range req.Tools {
 		totalLen += len(tool.Name) + len(tool.Description) + len(tool.InputSchema)
 	}
 	// Return len/4 as rough token estimate, minimum 1 if there's any content
 	tokens := totalLen / 4
 	if tokens == 0 && (len(req.Messages) > 0 || req.System != nil) {
 		tokens = 1
 	}
 	return tokens
 }
 func countAnyContent(content any) int {
 	if content == nil {
 		return 0
 	}
 	switch c := content.(type) {
 	case string:
 		return len(c)
 	case []ContentBlock:
 		total := 0
 		for _, block := range c {
 			total += countContentBlock(block)
 		}
 		return total
 	case []any:
 		total := 0
 		for _, item := range c {
 			data, err := json.Marshal(item)
 			if err != nil {
 				continue
 			}
 			var block ContentBlock
 			if err := json.Unmarshal(data, &block); err == nil {
 				total += countContentBlock(block)
 			}
 		}
 		return total
 	default:
 		if data, err := json.Marshal(content); err == nil {
 			return len(data)
 		}
 		return 0
 	}
 }
 func countContentBlock(block ContentBlock) int {
 	total := 0
 	if block.Text != nil {
 		total += len(*block.Text)
 	}
 	if block.Thinking != nil {
 		total += len(*block.Thinking)
 	}
 	if block.Type == "tool_use" || block.Type == "tool_result" {
 		if data, err := json.Marshal(block); err == nil {
 			total += len(data)
 		}
 	}
 	return total
 }
 // OllamaWebSearchRequest represents a request to the Ollama web search API
 type OllamaWebSearchRequest struct {
 	Query      string `json:"query"`
 	MaxResults int    `json:"max_results,omitempty"`
 }
 // OllamaWebSearchResult represents a single search result from Ollama API
 type OllamaWebSearchResult struct {
 	Title   string `json:"title"`
 	URL     string `json:"url"`
 	Content string `json:"content"`
 }
 // OllamaWebSearchResponse represents the response from the Ollama web search API
 type OllamaWebSearchResponse struct {
 	Results []OllamaWebSearchResult `json:"results"`
 }
 var WebSearchEndpoint = "https://ollama.com/api/web_search"
 func WebSearch(ctx context.Context, query string, maxResults int) (*OllamaWebSearchResponse, error) {
 	if internalcloud.Disabled() {
 		logutil.TraceContext(ctx, "anthropic: web search blocked", "reason", "cloud_disabled")
 		return nil, errors.New(internalcloud.DisabledError("web search is unavailable"))
 	}
 	if maxResults <= 0 {
 		maxResults = 5
 	}
 	if maxResults > 10 {
 		maxResults = 10
 	}
 	reqBody := OllamaWebSearchRequest{
 		Query:      query,
 		MaxResults: maxResults,
 	}
 	body, err := json.Marshal(reqBody)
 	if err != nil {
 		return nil, fmt.Errorf("failed to marshal web search request: %w", err)
 	}
 	searchURL, err := url.Parse(WebSearchEndpoint)
 	if err != nil {
 		return nil, fmt.Errorf("failed to parse web search URL: %w", err)
 	}
 	logutil.TraceContext(ctx, "anthropic: web search request",
 		"query", TraceTruncateString(query),
 		"max_results", maxResults,
 		"url", searchURL.String(),
 	)
 	q := searchURL.Query()
 	q.Set("ts", strconv.FormatInt(time.Now().Unix(), 10))
 	searchURL.RawQuery = q.Encode()
 	signature := ""
 	if strings.EqualFold(searchURL.Hostname(), "ollama.com") {
 		challenge := fmt.Sprintf("%s,%s", http.MethodPost, searchURL.RequestURI())
 		signature, err = auth.Sign(ctx, []byte(challenge))
 		if err != nil {
 			return nil, fmt.Errorf("failed to sign web search request: %w", err)
 		}
 	}
 	logutil.TraceContext(ctx, "anthropic: web search auth", "signed", signature != "")
 	req, err := http.NewRequestWithContext(ctx, "POST", searchURL.String(), bytes.NewReader(body))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create web search request: %w", err)
 	}
 	req.Header.Set("Content-Type", "application/json")
 	if signature != "" {
 		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", signature))
 	}
 	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		return nil, fmt.Errorf("web search request failed: %w", err)
 	}
 	defer resp.Body.Close()
 	logutil.TraceContext(ctx, "anthropic: web search response", "status", resp.StatusCode)
 	if resp.StatusCode != http.StatusOK {
 		respBody, _ := io.ReadAll(resp.Body)
 		return nil, fmt.Errorf("web search returned status %d: %s", resp.StatusCode, string(respBody))
 	}
 	var searchResp OllamaWebSearchResponse
 	if err := json.NewDecoder(resp.Body).Decode(&searchResp); err != nil {
 		return nil, fmt.Errorf("failed to decode web search response: %w", err)
 	}
 	logutil.TraceContext(ctx, "anthropic: web search results", "count", len(searchResp.Results))
 	return &searchResp, nil
 }
 func ConvertOllamaToAnthropicResults(ollamaResults *OllamaWebSearchResponse) []WebSearchResult {
 	var results []WebSearchResult
 	for _, r := range ollamaResults.Results {
 		results = append(results, WebSearchResult{
 			Type:  "web_search_result",
 			URL:   r.URL,
 			Title: r.Title,
 		})
 	}
 	return results
 }
--- a/anthropic/anthropic_test.go
+++ b/anthropic/anthropic_test.go
--- a/anthropic/trace.go
+++ b/anthropic/trace.go
@@ -0,0 +1,352 @@
 package anthropic
 import (
 	"encoding/json"
 	"fmt"
 	"sort"
 	"github.com/ollama/ollama/api"
 )
 // Trace truncation limits.
 const (
 	TraceMaxStringRunes = 240
 	TraceMaxSliceItems  = 8
 	TraceMaxMapEntries  = 16
 	TraceMaxDepth       = 4
 )
 // TraceTruncateString shortens s to TraceMaxStringRunes, appending a count of
 // omitted characters when truncated.
 func TraceTruncateString(s string) string {
 	if len(s) == 0 {
 		return s
 	}
 	runes := []rune(s)
 	if len(runes) <= TraceMaxStringRunes {
 		return s
 	}
 	return fmt.Sprintf("%s...(+%d chars)", string(runes[:TraceMaxStringRunes]), len(runes)-TraceMaxStringRunes)
 }
 // TraceJSON round-trips v through JSON and returns a compacted representation.
 func TraceJSON(v any) any {
 	if v == nil {
 		return nil
 	}
 	data, err := json.Marshal(v)
 	if err != nil {
 		return map[string]any{"marshal_error": err.Error(), "type": fmt.Sprintf("%T", v)}
 	}
 	var out any
 	if err := json.Unmarshal(data, &out); err != nil {
 		return TraceTruncateString(string(data))
 	}
 	return TraceCompactValue(out, 0)
 }
 // TraceCompactValue recursively truncates strings, slices, and maps for trace
 // output. depth tracks recursion to enforce TraceMaxDepth.
 func TraceCompactValue(v any, depth int) any {
 	if v == nil {
 		return nil
 	}
 	if depth >= TraceMaxDepth {
 		switch t := v.(type) {
 		case string:
 			return TraceTruncateString(t)
 		case []any:
 			return fmt.Sprintf("<array len=%d>", len(t))
 		case map[string]any:
 			return fmt.Sprintf("<object keys=%d>", len(t))
 		default:
 			return fmt.Sprintf("<%T>", v)
 		}
 	}
 	switch t := v.(type) {
 	case string:
 		return TraceTruncateString(t)
 	case []any:
 		limit := min(len(t), TraceMaxSliceItems)
 		out := make([]any, 0, limit+1)
 		for i := range limit {
 			out = append(out, TraceCompactValue(t[i], depth+1))
 		}
 		if len(t) > limit {
 			out = append(out, fmt.Sprintf("... +%d more items", len(t)-limit))
 		}
 		return out
 	case map[string]any:
 		keys := make([]string, 0, len(t))
 		for k := range t {
 			keys = append(keys, k)
 		}
 		sort.Strings(keys)
 		limit := min(len(keys), TraceMaxMapEntries)
 		out := make(map[string]any, limit+1)
 		for i := range limit {
 			out[keys[i]] = TraceCompactValue(t[keys[i]], depth+1)
 		}
 		if len(keys) > limit {
 			out["__truncated_keys"] = len(keys) - limit
 		}
 		return out
 	default:
 		return t
 	}
 }
 // ---------------------------------------------------------------------------
 // Anthropic request/response tracing
 // ---------------------------------------------------------------------------
 // TraceMessagesRequest returns a compact trace representation of a MessagesRequest.
 func TraceMessagesRequest(r MessagesRequest) map[string]any {
 	return map[string]any{
 		"model":          r.Model,
 		"max_tokens":     r.MaxTokens,
 		"messages":       traceMessageParams(r.Messages),
 		"system":         traceAnthropicContent(r.System),
 		"stream":         r.Stream,
 		"tools":          traceTools(r.Tools),
 		"tool_choice":    TraceJSON(r.ToolChoice),
 		"thinking":       TraceJSON(r.Thinking),
 		"stop_sequences": r.StopSequences,
 		"temperature":    ptrVal(r.Temperature),
 		"top_p":          ptrVal(r.TopP),
 		"top_k":          ptrVal(r.TopK),
 	}
 }
 // TraceMessagesResponse returns a compact trace representation of a MessagesResponse.
 func TraceMessagesResponse(r MessagesResponse) map[string]any {
 	return map[string]any{
 		"id":          r.ID,
 		"model":       r.Model,
 		"content":     TraceJSON(r.Content),
 		"stop_reason": r.StopReason,
 		"usage":       r.Usage,
 	}
 }
 func traceMessageParams(msgs []MessageParam) []map[string]any {
 	out := make([]map[string]any, 0, len(msgs))
 	for _, m := range msgs {
 		out = append(out, map[string]any{
 			"role":    m.Role,
 			"content": traceAnthropicContent(m.Content),
 		})
 	}
 	return out
 }
 func traceAnthropicContent(content any) any {
 	switch c := content.(type) {
 	case nil:
 		return nil
 	case string:
 		return TraceTruncateString(c)
 	case []any:
 		blocks := make([]any, 0, len(c))
 		for _, block := range c {
 			blockMap, ok := block.(map[string]any)
 			if !ok {
 				blocks = append(blocks, TraceCompactValue(block, 0))
 				continue
 			}
 			blocks = append(blocks, traceAnthropicBlock(blockMap))
 		}
 		return blocks
 	default:
 		return TraceJSON(c)
 	}
 }
 func traceAnthropicBlock(block map[string]any) map[string]any {
 	blockType, _ := block["type"].(string)
 	out := map[string]any{"type": blockType}
 	switch blockType {
 	case "text":
 		if text, ok := block["text"].(string); ok {
 			out["text"] = TraceTruncateString(text)
 		} else {
 			out["text"] = TraceCompactValue(block["text"], 0)
 		}
 	case "thinking":
 		if thinking, ok := block["thinking"].(string); ok {
 			out["thinking"] = TraceTruncateString(thinking)
 		} else {
 			out["thinking"] = TraceCompactValue(block["thinking"], 0)
 		}
 	case "tool_use", "server_tool_use":
 		out["id"] = block["id"]
 		out["name"] = block["name"]
 		out["input"] = TraceCompactValue(block["input"], 0)
 	case "tool_result", "web_search_tool_result":
 		out["tool_use_id"] = block["tool_use_id"]
 		out["content"] = TraceCompactValue(block["content"], 0)
 	case "image":
 		if source, ok := block["source"].(map[string]any); ok {
 			out["source"] = map[string]any{
 				"type":       source["type"],
 				"media_type": source["media_type"],
 				"url":        source["url"],
 				"data_len":   len(fmt.Sprint(source["data"])),
 			}
 		}
 	default:
 		out["block"] = TraceCompactValue(block, 0)
 	}
 	return out
 }
 func traceTools(tools []Tool) []map[string]any {
 	out := make([]map[string]any, 0, len(tools))
 	for _, t := range tools {
 		out = append(out, TraceTool(t))
 	}
 	return out
 }
 // TraceTool returns a compact trace representation of an Anthropic Tool.
 func TraceTool(t Tool) map[string]any {
 	return map[string]any{
 		"type":         t.Type,
 		"name":         t.Name,
 		"description":  TraceTruncateString(t.Description),
 		"input_schema": TraceJSON(t.InputSchema),
 		"max_uses":     t.MaxUses,
 	}
 }
 // ContentBlockTypes returns the type strings from content (when it's []any blocks).
 func ContentBlockTypes(content any) []string {
 	blocks, ok := content.([]any)
 	if !ok {
 		return nil
 	}
 	types := make([]string, 0, len(blocks))
 	for _, block := range blocks {
 		blockMap, ok := block.(map[string]any)
 		if !ok {
 			types = append(types, fmt.Sprintf("%T", block))
 			continue
 		}
 		t, _ := blockMap["type"].(string)
 		types = append(types, t)
 	}
 	return types
 }
 func ptrVal[T any](v *T) any {
 	if v == nil {
 		return nil
 	}
 	return *v
 }
 // ---------------------------------------------------------------------------
 // Ollama api.* tracing (shared between anthropic and middleware packages)
 // ---------------------------------------------------------------------------
 // TraceChatRequest returns a compact trace representation of an Ollama ChatRequest.
 func TraceChatRequest(req *api.ChatRequest) map[string]any {
 	if req == nil {
 		return nil
 	}
 	stream := false
 	if req.Stream != nil {
 		stream = *req.Stream
 	}
 	return map[string]any{
 		"model":    req.Model,
 		"messages": TraceAPIMessages(req.Messages),
 		"tools":    TraceAPITools(req.Tools),
 		"stream":   stream,
 		"options":  req.Options,
 		"think":    TraceJSON(req.Think),
 	}
 }
 // TraceChatResponse returns a compact trace representation of an Ollama ChatResponse.
 func TraceChatResponse(resp api.ChatResponse) map[string]any {
 	return map[string]any{
 		"model":       resp.Model,
 		"done":        resp.Done,
 		"done_reason": resp.DoneReason,
 		"message":     TraceAPIMessage(resp.Message),
 		"metrics":     TraceJSON(resp.Metrics),
 	}
 }
 // TraceAPIMessages returns compact trace representations for a slice of api.Message.
 func TraceAPIMessages(msgs []api.Message) []map[string]any {
 	out := make([]map[string]any, 0, len(msgs))
 	for _, m := range msgs {
 		out = append(out, TraceAPIMessage(m))
 	}
 	return out
 }
 // TraceAPIMessage returns a compact trace representation of a single api.Message.
 func TraceAPIMessage(m api.Message) map[string]any {
 	return map[string]any{
 		"role":         m.Role,
 		"content":      TraceTruncateString(m.Content),
 		"thinking":     TraceTruncateString(m.Thinking),
 		"images":       traceImageSizes(m.Images),
 		"tool_calls":   traceToolCalls(m.ToolCalls),
 		"tool_name":    m.ToolName,
 		"tool_call_id": m.ToolCallID,
 	}
 }
 func traceImageSizes(images []api.ImageData) []int {
 	if len(images) == 0 {
 		return nil
 	}
 	sizes := make([]int, 0, len(images))
 	for _, img := range images {
 		sizes = append(sizes, len(img))
 	}
 	return sizes
 }
 // TraceAPITools returns compact trace representations for a slice of api.Tool.
 func TraceAPITools(tools api.Tools) []map[string]any {
 	out := make([]map[string]any, 0, len(tools))
 	for _, t := range tools {
 		out = append(out, TraceAPITool(t))
 	}
 	return out
 }
 // TraceAPITool returns a compact trace representation of a single api.Tool.
 func TraceAPITool(t api.Tool) map[string]any {
 	return map[string]any{
 		"type":        t.Type,
 		"name":        t.Function.Name,
 		"description": TraceTruncateString(t.Function.Description),
 		"parameters":  TraceJSON(t.Function.Parameters),
 	}
 }
 // TraceToolCall returns a compact trace representation of an api.ToolCall.
 func TraceToolCall(tc api.ToolCall) map[string]any {
 	return map[string]any{
 		"id":   tc.ID,
 		"name": tc.Function.Name,
 		"args": TraceJSON(tc.Function.Arguments),
 	}
 }
 func traceToolCalls(tcs []api.ToolCall) []map[string]any {
 	if len(tcs) == 0 {
 		return nil
 	}
 	out := make([]map[string]any, 0, len(tcs))
 	for _, tc := range tcs {
 		out = append(out, TraceToolCall(tc))
 	}
 	return out
 }
--- a/api/client.go
+++ b/api/client.go
@@ -449,6 +449,16 @@ func (c *Client) Version(ctx context.Context) (string, error) {
 	return version.Version, nil
 }
 // CloudStatusExperimental returns whether cloud features are disabled on the server.
 func (c *Client) CloudStatusExperimental(ctx context.Context) (*StatusResponse, error) {
 	var status StatusResponse
 	if err := c.do(ctx, http.MethodGet, "/api/status", nil, &status); err != nil {
 		return nil, err
 	}
 	return &status, nil
 }
 // Signout will signout a client for a local ollama server.
 func (c *Client) Signout(ctx context.Context) error {
 	return c.do(ctx, http.MethodPost, "/api/signout", nil, nil)
--- a/api/types.go
+++ b/api/types.go
@@ -436,6 +436,7 @@ type ToolProperty struct {
 	Description string             `json:"description,omitempty"`
 	Enum        []any              `json:"enum,omitempty"`
 	Properties  *ToolPropertiesMap `json:"properties,omitempty"`
 	Required    []string           `json:"required,omitempty"`
 }
 // ToTypeScriptType converts a ToolProperty to a TypeScript type string
@@ -834,6 +835,16 @@ type TokenResponse struct {
 	Token string `json:"token"`
 }
 type CloudStatus struct {
 	Disabled bool   `json:"disabled"`
 	Source   string `json:"source"`
 }
 // StatusResponse is the response from [Client.CloudStatusExperimental].
 type StatusResponse struct {
 	Cloud CloudStatus `json:"cloud"`
 }
 // GenerateResponse is the response passed into [GenerateResponseFunc].
 type GenerateResponse struct {
 	// Model is the model name that generated the response.
--- a/app/README.md
+++ b/app/README.md
@@ -75,9 +75,9 @@ The `-dev` flag enables:
 CI builds with Xcode 14.1 for OS compatibility prior to v13.  If you want to manually build v11+ support, you can download the older Xcode [here](https://developer.apple.com/services-account/download?path=/Developer_Tools/Xcode_14.1/Xcode_14.1.xip), extract, then `mv ./Xcode.app /Applications/Xcode_14.1.0.app` then activate with:
 ```
-export CGO_CFLAGS=-mmacosx-version-min=12.0
+export CGO_CFLAGS="-O3 -mmacosx-version-min=12.0"
-export CGO_CXXFLAGS=-mmacosx-version-min=12.0
+export CGO_CXXFLAGS="-O3 -mmacosx-version-min=12.0"
-export CGO_LDFLAGS=-mmacosx-version-min=12.0
+export CGO_LDFLAGS="-mmacosx-version-min=12.0"
 export SDKROOT=/Applications/Xcode_14.1.0.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
 export DEVELOPER_DIR=/Applications/Xcode_14.1.0.app/Contents/Developer
 ```
--- a/app/cmd/app/app.go
+++ b/app/cmd/app/app.go
@@ -35,6 +35,7 @@ import (
 var (
 	wv           = &Webview{}
 	uiServerPort int
 	appStore     *store.Store
 )
 var debug = strings.EqualFold(os.Getenv("OLLAMA_DEBUG"), "true") || os.Getenv("OLLAMA_DEBUG") == "1"
@@ -208,6 +209,7 @@ func main() {
 	uiServerPort = port
 	st := &store.Store{}
 	appStore = st
 	// Enable CORS in development mode
 	if devMode {
@@ -253,6 +255,8 @@ func main() {
 		done <- osrv.Run(octx)
 	}()
 	upd := &updater.Updater{Store: st}
 	uiServer := ui.Server{
 		Token: token,
 		Restart: func() {
@@ -267,6 +271,10 @@ func main() {
 		ToolRegistry: toolRegistry,
 		Dev:          devMode,
 		Logger:       slog.Default(),
 		Updater:      upd,
 		UpdateAvailableFunc: func() {
 			UpdateAvailable("")
 		},
 	}
 	srv := &http.Server{
@@ -284,8 +292,20 @@ func main() {
 		slog.Debug("background desktop server done")
 	}()
-	updater := &updater.Updater{Store: st}
+	upd.StartBackgroundUpdaterChecker(ctx, UpdateAvailable)
-	updater.StartBackgroundUpdaterChecker(ctx, UpdateAvailable)
+
 	// Check for pending updates on startup (show tray notification if update is ready)
 	if updater.IsUpdatePending() {
 		// On Windows, the tray is initialized in osRun(). Calling UpdateAvailable
 		// before that would dereference a nil tray callback.
 		// TODO: refactor so the update check runs after platform init on all platforms.
 		if runtime.GOOS == "windows" {
 			slog.Debug("update pending on startup, deferring tray notification until tray initialization")
 		} else {
 			slog.Debug("update pending on startup, showing tray notification")
 			UpdateAvailable("")
 		}
 	}
 	hasCompletedFirstRun, err := st.HasCompletedFirstRun()
 	if err != nil {
@@ -348,6 +368,17 @@ func startHiddenTasks() {
 			// CLI triggered app startup use-case
 			slog.Info("deferring pending update for fast startup")
 		} else {
 			// Check if auto-update is enabled before automatically upgrading
 			settings, err := appStore.Settings()
 			if err != nil {
 				slog.Warn("failed to load settings for upgrade check", "error", err)
 			} else if !settings.AutoUpdateEnabled {
 				slog.Info("auto-update disabled, skipping automatic upgrade at startup")
 				// Still show tray notification so user knows update is ready
 				UpdateAvailable("")
 				return
 			}
 			if err := updater.DoUpgradeAtStartup(); err != nil {
 				slog.Info("unable to perform upgrade at startup", "error", err)
 				// Make sure the restart to upgrade menu shows so we can attempt an interactive upgrade to get authorization
--- a/app/cmd/app/app_windows.go
+++ b/app/cmd/app/app_windows.go
@@ -154,6 +154,10 @@ func handleURLSchemeRequest(urlScheme string) {
 }
 func UpdateAvailable(ver string) error {
 	if app.t == nil {
 		slog.Debug("tray not yet initialized, skipping update notification")
 		return nil
 	}
 	return app.t.UpdateAvailable(ver)
 }
@@ -165,6 +169,14 @@ func osRun(shutdown func(), hasCompletedFirstRun, startHidden bool) {
 		log.Fatalf("Failed to start: %s", err)
 	}
 	// Check for pending updates now that the tray is initialized.
 	// The platform-independent check in app.go fires before osRun,
 	// when app.t is still nil, so we must re-check here.
 	if updater.IsUpdatePending() {
 		slog.Debug("update pending on startup, showing tray notification")
 		UpdateAvailable("")
 	}
 	signals := make(chan os.Signal, 1)
 	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
--- a/app/server/server.go
+++ b/app/server/server.go
@@ -41,6 +41,11 @@ type InferenceCompute struct {
 	VRAM    string
 }
 type InferenceInfo struct {
 	Computes             []InferenceCompute
 	DefaultContextLength int
 }
 func New(s *store.Store, devMode bool) *Server {
 	p := resolvePath("ollama")
 	return &Server{store: s, bin: p, dev: devMode}
@@ -205,6 +210,11 @@ func (s *Server) cmd(ctx context.Context) (*exec.Cmd, error) {
 		return nil, err
 	}
 	cloudDisabled, err := s.store.CloudDisabled()
 	if err != nil {
 		return nil, err
 	}
 	cmd := commandContext(ctx, s.bin, "serve")
 	cmd.Stdout, cmd.Stderr = s.log, s.log
@@ -230,6 +240,11 @@ func (s *Server) cmd(ctx context.Context) (*exec.Cmd, error) {
 	if settings.ContextLength > 0 {
 		env["OLLAMA_CONTEXT_LENGTH"] = strconv.Itoa(settings.ContextLength)
 	}
 	if cloudDisabled {
 		env["OLLAMA_NO_CLOUD"] = "1"
 	} else {
 		env["OLLAMA_NO_CLOUD"] = "0"
 	}
 	cmd.Env = []string{}
 	for k, v := range env {
 		cmd.Env = append(cmd.Env, k+"="+v)
@@ -262,9 +277,12 @@ func openRotatingLog() (io.WriteCloser, error) {
 // Attempt to retrieve inference compute information from the server
 // log.  Set ctx to timeout to control how long to wait for the logs to appear
-func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
+func GetInferenceInfo(ctx context.Context) (*InferenceInfo, error) {
-	inference := []InferenceCompute{}
+	info := &InferenceInfo{}
-	marker := regexp.MustCompile(`inference compute.*library=`)
+	computeMarker := regexp.MustCompile(`inference compute.*library=`)
 	defaultCtxMarker := regexp.MustCompile(`vram-based default context`)
 	defaultCtxRegex := regexp.MustCompile(`default_num_ctx=(\d+)`)
 	q := `inference compute.*%s=["]([^"]*)["]`
 	nq := `inference compute.*%s=(\S+)\s`
 	type regex struct {
@@ -330,8 +348,8 @@ func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
 		scanner := bufio.NewScanner(file)
 		for scanner.Scan() {
 			line := scanner.Text()
-			match := marker.FindStringSubmatch(line)
+			// Check for inference compute lines
-			if len(match) > 0 {
+			if computeMarker.MatchString(line) {
 				ic := InferenceCompute{
 					Library: get("library", line),
 					Variant: get("variant", line),
@@ -342,12 +360,25 @@ func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
 				}
 				slog.Info("Matched", "inference compute", ic)
-				inference = append(inference, ic)
+				info.Computes = append(info.Computes, ic)
-			} else {
+				continue
-				// Break out on first non matching line after we start matching
+			}
-				if len(inference) > 0 {
+			// Check for default context length line
-					return inference, nil
+			if defaultCtxMarker.MatchString(line) {
 				match := defaultCtxRegex.FindStringSubmatch(line)
 				if len(match) > 1 {
 					numCtx, err := strconv.Atoi(match[1])
 					if err == nil {
 						info.DefaultContextLength = numCtx
 						slog.Info("Matched default context length", "default_num_ctx", numCtx)
 					}
 				}
 				return info, nil
 			}
 			// If we've found compute info but hit a non-matching line, return what we have
 			// This handles older server versions that don't log the default context line
 			if len(info.Computes) > 0 {
 				return info, nil
 			}
 		}
 		time.Sleep(100 * time.Millisecond)
--- a/app/server/server_test.go
+++ b/app/server/server_test.go
@@ -111,7 +111,7 @@ func TestServerCmd(t *testing.T) {
 			for _, want := range tt.want {
 				found := false
 				for _, env := range cmd.Env {
-					if strings.Contains(env, want) {
+					if strings.HasPrefix(env, want) {
 						found = true
 						break
 					}
@@ -123,7 +123,7 @@ func TestServerCmd(t *testing.T) {
 			for _, dont := range tt.dont {
 				for _, env := range cmd.Env {
-					if strings.Contains(env, dont) {
+					if strings.HasPrefix(env, dont) {
 						t.Errorf("unexpected environment variable: %s", env)
 					}
 				}
@@ -136,44 +136,119 @@ func TestServerCmd(t *testing.T) {
 	}
 }
-func TestGetInferenceComputer(t *testing.T) {
+func TestServerCmdCloudSettingEnv(t *testing.T) {
 	tests := []struct {
-		name string
+		name          string
-		log  string
+		envValue      string
-		exp  []InferenceCompute
+		configContent string
 		want          string
 	}{
 		{
 			name: "default cloud enabled",
 			want: "OLLAMA_NO_CLOUD=0",
 		},
 		{
 			name:     "env disables cloud",
 			envValue: "1",
 			want:     "OLLAMA_NO_CLOUD=1",
 		},
 		{
 			name:          "config disables cloud",
 			configContent: `{"disable_ollama_cloud": true}`,
 			want:          "OLLAMA_NO_CLOUD=1",
 		},
 		{
 			name:     "invalid env disables cloud",
 			envValue: "invalid",
 			want:     "OLLAMA_NO_CLOUD=1",
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			tmpHome := t.TempDir()
 			t.Setenv("HOME", tmpHome)
 			t.Setenv("USERPROFILE", tmpHome)
 			t.Setenv("OLLAMA_NO_CLOUD", tt.envValue)
 			if tt.configContent != "" {
 				configDir := filepath.Join(tmpHome, ".ollama")
 				if err := os.MkdirAll(configDir, 0o755); err != nil {
 					t.Fatalf("mkdir config dir: %v", err)
 				}
 				configPath := filepath.Join(configDir, "server.json")
 				if err := os.WriteFile(configPath, []byte(tt.configContent), 0o644); err != nil {
 					t.Fatalf("write config: %v", err)
 				}
 			}
 			st := &store.Store{DBPath: filepath.Join(t.TempDir(), "db.sqlite")}
 			defer st.Close()
 			s := &Server{store: st}
 			cmd, err := s.cmd(t.Context())
 			if err != nil {
 				t.Fatalf("s.cmd() error = %v", err)
 			}
 			found := false
 			for _, env := range cmd.Env {
 				if env == tt.want {
 					found = true
 					break
 				}
 			}
 			if !found {
 				t.Fatalf("expected environment variable %q in command env", tt.want)
 			}
 		})
 	}
 }
 func TestGetInferenceInfo(t *testing.T) {
 	tests := []struct {
 		name             string
 		log              string
 		expComputes      []InferenceCompute
 		expDefaultCtxLen int
 	}{
 		{
 			name: "metal",
 			log: `time=2025-06-30T09:23:07.374-07:00 level=DEBUG source=sched.go:108 msg="starting llm scheduler"
 time=2025-06-30T09:23:07.416-07:00 level=INFO source=types.go:130 msg="inference compute" id=0 library=metal variant="" compute="" driver=0.0 name="" total="96.0 GiB" available="96.0 GiB"
 time=2025-06-30T09:23:07.417-07:00 level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="96.0 GiB" default_num_ctx=262144
 time=2025-06-30T09:25:56.197-07:00 level=DEBUG source=ggml.go:155 msg="key not found" key=general.alignment default=32
 `,
-			exp: []InferenceCompute{{
+			expComputes: []InferenceCompute{{
 				Library: "metal",
 				Driver:  "0.0",
 				VRAM:    "96.0 GiB",
 			}},
 			expDefaultCtxLen: 262144,
 		},
 		{
 			name: "cpu",
 			log: `time=2025-07-01T17:59:51.470Z level=INFO source=gpu.go:377 msg="no compatible GPUs were discovered"
 time=2025-07-01T17:59:51.470Z level=INFO source=types.go:130 msg="inference compute" id=0 library=cpu variant="" compute="" driver=0.0 name="" total="31.3 GiB" available="30.4 GiB"
 time=2025-07-01T17:59:51.471Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="31.3 GiB" default_num_ctx=32768
 [GIN] 2025/07/01 - 18:00:09 | 200 |      50.263µs | 100.126.204.152 | HEAD     "/"
 `,
-			exp: []InferenceCompute{{
+			expComputes: []InferenceCompute{{
 				Library: "cpu",
 				Driver:  "0.0",
 				VRAM:    "31.3 GiB",
 			}},
 			expDefaultCtxLen: 32768,
 		},
 		{
 			name: "cuda1",
 			log: `time=2025-07-01T19:33:43.162Z level=DEBUG source=amd_linux.go:419 msg="amdgpu driver not detected /sys/module/amdgpu"
 releasing cuda driver library
 time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference compute" id=GPU-452cac9f-6960-839c-4fb3-0cec83699196 library=cuda variant=v12 compute=6.1 driver=12.7 name="NVIDIA GeForce GT 1030" total="3.9 GiB" available="3.9 GiB"
 time=2025-07-01T19:33:43.163Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="3.9 GiB" default_num_ctx=4096
 [GIN] 2025/07/01 - 18:00:09 | 200 |      50.263µs | 100.126.204.152 | HEAD     "/"
 `,
-			exp: []InferenceCompute{{
+			expComputes: []InferenceCompute{{
 				Library: "cuda",
 				Variant: "v12",
 				Compute: "6.1",
@@ -181,6 +256,7 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
 				Name:    "NVIDIA GeForce GT 1030",
 				VRAM:    "3.9 GiB",
 			}},
 			expDefaultCtxLen: 4096,
 		},
 		{
 			name: "frank",
@@ -188,9 +264,10 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
 		releasing cuda driver library
 		time=2025-07-01T19:36:13.315Z level=INFO source=types.go:130 msg="inference compute" id=GPU-d6de3398-9932-6902-11ec-fee8e424c8a2 library=cuda variant=v12 compute=7.5 driver=12.8 name="NVIDIA GeForce RTX 2080 Ti" total="10.6 GiB" available="10.4 GiB"
 		time=2025-07-01T19:36:13.315Z level=INFO source=types.go:130 msg="inference compute" id=GPU-9abb57639fa80c50 library=rocm variant="" compute=gfx1030 driver=6.3 name=1002:73bf total="16.0 GiB" available="1.3 GiB"
 		time=2025-07-01T19:36:13.316Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="26.6 GiB" default_num_ctx=32768
 		[GIN] 2025/07/01 - 18:00:09 | 200 |      50.263µs | 100.126.204.152 | HEAD     "/"
 		`,
-			exp: []InferenceCompute{
+			expComputes: []InferenceCompute{
 				{
 					Library: "cuda",
 					Variant: "v12",
@@ -207,6 +284,20 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
 					VRAM:    "16.0 GiB",
 				},
 			},
 			expDefaultCtxLen: 32768,
 		},
 		{
 			name: "missing_default_context",
 			log: `time=2025-06-30T09:23:07.374-07:00 level=DEBUG source=sched.go:108 msg="starting llm scheduler"
 time=2025-06-30T09:23:07.416-07:00 level=INFO source=types.go:130 msg="inference compute" id=0 library=metal variant="" compute="" driver=0.0 name="" total="96.0 GiB" available="96.0 GiB"
 time=2025-06-30T09:25:56.197-07:00 level=DEBUG source=ggml.go:155 msg="key not found" key=general.alignment default=32
 `,
 			expComputes: []InferenceCompute{{
 				Library: "metal",
 				Driver:  "0.0",
 				VRAM:    "96.0 GiB",
 			}},
 			expDefaultCtxLen: 0, // No default context line, should return 0
 		},
 	}
 	for _, tt := range tests {
@@ -219,18 +310,21 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
 			}
 			ctx, cancel := context.WithTimeout(t.Context(), 10*time.Millisecond)
 			defer cancel()
-			ics, err := GetInferenceComputer(ctx)
+			info, err := GetInferenceInfo(ctx)
 			if err != nil {
-				t.Fatalf(" failed to get inference compute: %v", err)
+				t.Fatalf("failed to get inference info: %v", err)
 			}
-			if !reflect.DeepEqual(ics, tt.exp) {
+			if !reflect.DeepEqual(info.Computes, tt.expComputes) {
-				t.Fatalf("got:\n%#v\nwant:\n%#v", ics, tt.exp)
+				t.Fatalf("computes mismatch\ngot:\n%#v\nwant:\n%#v", info.Computes, tt.expComputes)
 			}
 			if info.DefaultContextLength != tt.expDefaultCtxLen {
 				t.Fatalf("default context length mismatch: got %d, want %d", info.DefaultContextLength, tt.expDefaultCtxLen)
 			}
 		})
 	}
 }
-func TestGetInferenceComputerTimeout(t *testing.T) {
+func TestGetInferenceInfoTimeout(t *testing.T) {
 	ctx, cancel := context.WithTimeout(t.Context(), 10*time.Millisecond)
 	defer cancel()
 	tmpDir := t.TempDir()
@@ -239,7 +333,7 @@ func TestGetInferenceComputerTimeout(t *testing.T) {
 	if err != nil {
 		t.Fatalf("failed to write log file %s: %s", serverLogPath, err)
 	}
-	_, err = GetInferenceComputer(ctx)
+	_, err = GetInferenceInfo(ctx)
 	if err == nil {
 		t.Fatal("expected timeout")
 	}
--- a/app/store/cloud_config.go
+++ b/app/store/cloud_config.go
@@ -0,0 +1,128 @@
 //go:build windows || darwin
 package store
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
 	"github.com/ollama/ollama/envconfig"
 )
 const serverConfigFilename = "server.json"
 type serverConfig struct {
 	DisableOllamaCloud bool `json:"disable_ollama_cloud,omitempty"`
 }
 // CloudDisabled returns whether cloud features should be disabled.
 // The source of truth is: OLLAMA_NO_CLOUD OR ~/.ollama/server.json:disable_ollama_cloud.
 func (s *Store) CloudDisabled() (bool, error) {
 	disabled, _, err := s.CloudStatus()
 	return disabled, err
 }
 // CloudStatus returns whether cloud is disabled and the source of that decision.
 // Source is one of: "none", "env", "config", "both".
 func (s *Store) CloudStatus() (bool, string, error) {
 	if err := s.ensureDB(); err != nil {
 		return false, "", err
 	}
 	configDisabled, err := readServerConfigCloudDisabled()
 	if err != nil {
 		return false, "", err
 	}
 	envDisabled := envconfig.NoCloudEnv()
 	return envDisabled || configDisabled, cloudStatusSource(envDisabled, configDisabled), nil
 }
 // SetCloudEnabled writes the cloud setting to ~/.ollama/server.json.
 func (s *Store) SetCloudEnabled(enabled bool) error {
 	if err := s.ensureDB(); err != nil {
 		return err
 	}
 	return setCloudEnabled(enabled)
 }
 func setCloudEnabled(enabled bool) error {
 	configPath, err := serverConfigPath()
 	if err != nil {
 		return err
 	}
 	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
 		return fmt.Errorf("create server config directory: %w", err)
 	}
 	configMap := map[string]any{}
 	if data, err := os.ReadFile(configPath); err == nil {
 		if err := json.Unmarshal(data, &configMap); err != nil {
 			// If the existing file is invalid JSON, overwrite with a fresh object.
 			configMap = map[string]any{}
 		}
 	} else if !errors.Is(err, os.ErrNotExist) {
 		return fmt.Errorf("read server config: %w", err)
 	}
 	configMap["disable_ollama_cloud"] = !enabled
 	data, err := json.MarshalIndent(configMap, "", "  ")
 	if err != nil {
 		return fmt.Errorf("marshal server config: %w", err)
 	}
 	data = append(data, '\n')
 	if err := os.WriteFile(configPath, data, 0o644); err != nil {
 		return fmt.Errorf("write server config: %w", err)
 	}
 	return nil
 }
 func readServerConfigCloudDisabled() (bool, error) {
 	configPath, err := serverConfigPath()
 	if err != nil {
 		return false, err
 	}
 	data, err := os.ReadFile(configPath)
 	if err != nil {
 		if errors.Is(err, os.ErrNotExist) {
 			return false, nil
 		}
 		return false, fmt.Errorf("read server config: %w", err)
 	}
 	var cfg serverConfig
 	// Invalid or unexpected JSON should not block startup; treat as default.
 	if json.Unmarshal(data, &cfg) == nil {
 		return cfg.DisableOllamaCloud, nil
 	}
 	return false, nil
 }
 func serverConfigPath() (string, error) {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return "", fmt.Errorf("resolve home directory: %w", err)
 	}
 	return filepath.Join(home, ".ollama", serverConfigFilename), nil
 }
 func cloudStatusSource(envDisabled bool, configDisabled bool) string {
 	switch {
 	case envDisabled && configDisabled:
 		return "both"
 	case envDisabled:
 		return "env"
 	case configDisabled:
 		return "config"
 	default:
 		return "none"
 	}
 }
--- a/app/store/cloud_config_test.go
+++ b/app/store/cloud_config_test.go
@@ -0,0 +1,130 @@
 //go:build windows || darwin
 package store
 import (
 	"encoding/json"
 	"os"
 	"path/filepath"
 	"testing"
 )
 func TestCloudDisabled(t *testing.T) {
 	tests := []struct {
 		name          string
 		envValue      string
 		configContent string
 		wantDisabled  bool
 		wantSource    string
 	}{
 		{
 			name:         "default enabled",
 			wantDisabled: false,
 			wantSource:   "none",
 		},
 		{
 			name:         "env disables cloud",
 			envValue:     "1",
 			wantDisabled: true,
 			wantSource:   "env",
 		},
 		{
 			name:          "config disables cloud",
 			configContent: `{"disable_ollama_cloud": true}`,
 			wantDisabled:  true,
 			wantSource:    "config",
 		},
 		{
 			name:          "env and config",
 			envValue:      "1",
 			configContent: `{"disable_ollama_cloud": false}`,
 			wantDisabled:  true,
 			wantSource:    "env",
 		},
 		{
 			name:          "invalid config is ignored",
 			configContent: `{bad`,
 			wantDisabled:  false,
 			wantSource:    "none",
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			tmpHome := t.TempDir()
 			setTestHome(t, tmpHome)
 			t.Setenv("OLLAMA_NO_CLOUD", tt.envValue)
 			if tt.configContent != "" {
 				configDir := filepath.Join(tmpHome, ".ollama")
 				if err := os.MkdirAll(configDir, 0o755); err != nil {
 					t.Fatalf("mkdir config dir: %v", err)
 				}
 				configPath := filepath.Join(configDir, serverConfigFilename)
 				if err := os.WriteFile(configPath, []byte(tt.configContent), 0o644); err != nil {
 					t.Fatalf("write config: %v", err)
 				}
 			}
 			s := &Store{DBPath: filepath.Join(tmpHome, "db.sqlite")}
 			defer s.Close()
 			disabled, err := s.CloudDisabled()
 			if err != nil {
 				t.Fatalf("CloudDisabled() error = %v", err)
 			}
 			if disabled != tt.wantDisabled {
 				t.Fatalf("CloudDisabled() = %v, want %v", disabled, tt.wantDisabled)
 			}
 			statusDisabled, source, err := s.CloudStatus()
 			if err != nil {
 				t.Fatalf("CloudStatus() error = %v", err)
 			}
 			if statusDisabled != tt.wantDisabled {
 				t.Fatalf("CloudStatus() disabled = %v, want %v", statusDisabled, tt.wantDisabled)
 			}
 			if source != tt.wantSource {
 				t.Fatalf("CloudStatus() source = %v, want %v", source, tt.wantSource)
 			}
 		})
 	}
 }
 func TestSetCloudEnabled(t *testing.T) {
 	tmpHome := t.TempDir()
 	setTestHome(t, tmpHome)
 	configDir := filepath.Join(tmpHome, ".ollama")
 	if err := os.MkdirAll(configDir, 0o755); err != nil {
 		t.Fatalf("mkdir config dir: %v", err)
 	}
 	configPath := filepath.Join(configDir, serverConfigFilename)
 	if err := os.WriteFile(configPath, []byte(`{"another_key":"value","disable_ollama_cloud":true}`), 0o644); err != nil {
 		t.Fatalf("seed config: %v", err)
 	}
 	s := &Store{DBPath: filepath.Join(tmpHome, "db.sqlite")}
 	defer s.Close()
 	if err := s.SetCloudEnabled(true); err != nil {
 		t.Fatalf("SetCloudEnabled(true) error = %v", err)
 	}
 	data, err := os.ReadFile(configPath)
 	if err != nil {
 		t.Fatalf("read config: %v", err)
 	}
 	var got map[string]any
 	if err := json.Unmarshal(data, &got); err != nil {
 		t.Fatalf("unmarshal config: %v", err)
 	}
 	if got["disable_ollama_cloud"] != false {
 		t.Fatalf("disable_ollama_cloud = %v, want false", got["disable_ollama_cloud"])
 	}
 	if got["another_key"] != "value" {
 		t.Fatalf("another_key = %v, want value", got["another_key"])
 	}
 }
--- a/app/store/database.go
+++ b/app/store/database.go
@@ -9,12 +9,12 @@ import (
 	"strings"
 	"time"
-	sqlite3 "github.com/mattn/go-sqlite3"
+	_ "github.com/mattn/go-sqlite3"
 )
 // currentSchemaVersion defines the current database schema version.
 // Increment this when making schema changes that require migrations.
-const currentSchemaVersion = 12
+const currentSchemaVersion = 16
 // database wraps the SQLite connection.
 // SQLite handles its own locking for concurrent access:
@@ -73,7 +73,7 @@ func (db *database) init() error {
 		agent BOOLEAN NOT NULL DEFAULT 0,
 		tools BOOLEAN NOT NULL DEFAULT 0,
 		working_dir TEXT NOT NULL DEFAULT '',
-		context_length INTEGER NOT NULL DEFAULT 4096,
+		context_length INTEGER NOT NULL DEFAULT 0,
 		window_width INTEGER NOT NULL DEFAULT 0,
 		window_height INTEGER NOT NULL DEFAULT 0,
 		config_migrated BOOLEAN NOT NULL DEFAULT 0,
@@ -82,9 +82,12 @@ func (db *database) init() error {
 		websearch_enabled BOOLEAN NOT NULL DEFAULT 0,
 		selected_model TEXT NOT NULL DEFAULT '',
 		sidebar_open BOOLEAN NOT NULL DEFAULT 0,
 		last_home_view TEXT NOT NULL DEFAULT 'launch',
 		think_enabled BOOLEAN NOT NULL DEFAULT 0,
 		think_level TEXT NOT NULL DEFAULT '',
 		cloud_setting_migrated BOOLEAN NOT NULL DEFAULT 0,
 		remote TEXT NOT NULL DEFAULT '', -- deprecated
 		auto_update_enabled BOOLEAN NOT NULL DEFAULT 1,
 		schema_version INTEGER NOT NULL DEFAULT %d
 	);
@@ -244,6 +247,30 @@ func (db *database) migrate() error {
 				return fmt.Errorf("migrate v11 to v12: %w", err)
 			}
 			version = 12
 		case 12:
 			// add cloud_setting_migrated column to settings table
 			if err := db.migrateV12ToV13(); err != nil {
 				return fmt.Errorf("migrate v12 to v13: %w", err)
 			}
 			version = 13
 		case 13:
 			// change default context_length from 4096 to 0 (VRAM-based tiered defaults)
 			if err := db.migrateV13ToV14(); err != nil {
 				return fmt.Errorf("migrate v13 to v14: %w", err)
 			}
 			version = 14
 		case 14:
 			// add auto_update_enabled column to settings table
 			if err := db.migrateV14ToV15(); err != nil {
 				return fmt.Errorf("migrate v14 to v15: %w", err)
 			}
 			version = 15
 		case 15:
 			// add last_home_view column to settings table
 			if err := db.migrateV15ToV16(); err != nil {
 				return fmt.Errorf("migrate v15 to v16: %w", err)
 			}
 			version = 16
 		default:
 			// If we have a version we don't recognize, just set it to current
 			// This might happen during development
@@ -452,6 +479,67 @@ func (db *database) migrateV11ToV12() error {
 	return nil
 }
 // migrateV12ToV13 adds cloud_setting_migrated to settings.
 func (db *database) migrateV12ToV13() error {
 	_, err := db.conn.Exec(`ALTER TABLE settings ADD COLUMN cloud_setting_migrated BOOLEAN NOT NULL DEFAULT 0`)
 	if err != nil && !duplicateColumnError(err) {
 		return fmt.Errorf("add cloud_setting_migrated column: %w", err)
 	}
 	_, err = db.conn.Exec(`UPDATE settings SET schema_version = 13`)
 	if err != nil {
 		return fmt.Errorf("update schema version: %w", err)
 	}
 	return nil
 }
 // migrateV13ToV14 changes the default context_length from 4096 to 0.
 // When context_length is 0, the ollama server uses VRAM-based tiered defaults.
 func (db *database) migrateV13ToV14() error {
 	_, err := db.conn.Exec(`UPDATE settings SET context_length = 0 WHERE context_length = 4096`)
 	if err != nil {
 		return fmt.Errorf("update context_length default: %w", err)
 	}
 	_, err = db.conn.Exec(`UPDATE settings SET schema_version = 14`)
 	if err != nil {
 		return fmt.Errorf("update schema version: %w", err)
 	}
 	return nil
 }
 // migrateV14ToV15 adds the auto_update_enabled column to the settings table
 func (db *database) migrateV14ToV15() error {
 	_, err := db.conn.Exec(`ALTER TABLE settings ADD COLUMN auto_update_enabled BOOLEAN NOT NULL DEFAULT 1`)
 	if err != nil && !duplicateColumnError(err) {
 		return fmt.Errorf("add auto_update_enabled column: %w", err)
 	}
 	_, err = db.conn.Exec(`UPDATE settings SET schema_version = 15`)
 	if err != nil {
 		return fmt.Errorf("update schema version: %w", err)
 	}
 	return nil
 }
 // migrateV15ToV16 adds the last_home_view column to the settings table
 func (db *database) migrateV15ToV16() error {
 	_, err := db.conn.Exec(`ALTER TABLE settings ADD COLUMN last_home_view TEXT NOT NULL DEFAULT 'launch'`)
 	if err != nil && !duplicateColumnError(err) {
 		return fmt.Errorf("add last_home_view column: %w", err)
 	}
 	_, err = db.conn.Exec(`UPDATE settings SET schema_version = 16`)
 	if err != nil {
 		return fmt.Errorf("update schema version: %w", err)
 	}
 	return nil
 }
 // cleanupOrphanedData removes orphaned records that may exist due to the foreign key bug
 func (db *database) cleanupOrphanedData() error {
 	_, err := db.conn.Exec(`
@@ -482,19 +570,11 @@ func (db *database) cleanupOrphanedData() error {
 }
 func duplicateColumnError(err error) bool {
-	if sqlite3Err, ok := err.(sqlite3.Error); ok {
+	return err != nil && strings.Contains(err.Error(), "duplicate column name")
 		return sqlite3Err.Code == sqlite3.ErrError &&
 			strings.Contains(sqlite3Err.Error(), "duplicate column name")
 	}
 	return false
 }
 func columnNotExists(err error) bool {
-	if sqlite3Err, ok := err.(sqlite3.Error); ok {
+	return err != nil && strings.Contains(err.Error(), "no such column")
 		return sqlite3Err.Code == sqlite3.ErrError &&
 			strings.Contains(sqlite3Err.Error(), "no such column")
 	}
 	return false
 }
 func (db *database) getAllChats() ([]Chat, error) {
@@ -1108,9 +1188,9 @@ func (db *database) getSettings() (Settings, error) {
 	var s Settings
 	err := db.conn.QueryRow(`
-		SELECT expose, survey, browser, models, agent, tools, working_dir, context_length, airplane_mode, turbo_enabled, websearch_enabled, selected_model, sidebar_open, think_enabled, think_level 
+		SELECT expose, survey, browser, models, agent, tools, working_dir, context_length, turbo_enabled, websearch_enabled, selected_model, sidebar_open, last_home_view, think_enabled, think_level, auto_update_enabled
 		FROM settings
-	`).Scan(&s.Expose, &s.Survey, &s.Browser, &s.Models, &s.Agent, &s.Tools, &s.WorkingDir, &s.ContextLength, &s.AirplaneMode, &s.TurboEnabled, &s.WebSearchEnabled, &s.SelectedModel, &s.SidebarOpen, &s.ThinkEnabled, &s.ThinkLevel)
+	`).Scan(&s.Expose, &s.Survey, &s.Browser, &s.Models, &s.Agent, &s.Tools, &s.WorkingDir, &s.ContextLength, &s.TurboEnabled, &s.WebSearchEnabled, &s.SelectedModel, &s.SidebarOpen, &s.LastHomeView, &s.ThinkEnabled, &s.ThinkLevel, &s.AutoUpdateEnabled)
 	if err != nil {
 		return Settings{}, fmt.Errorf("get settings: %w", err)
 	}
@@ -1119,16 +1199,58 @@ func (db *database) getSettings() (Settings, error) {
 }
 func (db *database) setSettings(s Settings) error {
 	lastHomeView := strings.ToLower(strings.TrimSpace(s.LastHomeView))
 	validLaunchView := map[string]struct{}{
 		"launch":   {},
 		"openclaw": {},
 		"claude":   {},
 		"codex":    {},
 		"opencode": {},
 		"droid":    {},
 		"pi":       {},
 	}
 	if lastHomeView != "chat" {
 		if _, ok := validLaunchView[lastHomeView]; !ok {
 			lastHomeView = "launch"
 		}
 	}
 	_, err := db.conn.Exec(`
 		UPDATE settings
-		SET expose = ?, survey = ?, browser = ?, models = ?, agent = ?, tools = ?, working_dir = ?, context_length = ?, airplane_mode = ?, turbo_enabled = ?, websearch_enabled = ?, selected_model = ?, sidebar_open = ?, think_enabled = ?, think_level = ?
+		SET expose = ?, survey = ?, browser = ?, models = ?, agent = ?, tools = ?, working_dir = ?, context_length = ?, turbo_enabled = ?, websearch_enabled = ?, selected_model = ?, sidebar_open = ?, last_home_view = ?, think_enabled = ?, think_level = ?, auto_update_enabled = ?
-	`, s.Expose, s.Survey, s.Browser, s.Models, s.Agent, s.Tools, s.WorkingDir, s.ContextLength, s.AirplaneMode, s.TurboEnabled, s.WebSearchEnabled, s.SelectedModel, s.SidebarOpen, s.ThinkEnabled, s.ThinkLevel)
+	`, s.Expose, s.Survey, s.Browser, s.Models, s.Agent, s.Tools, s.WorkingDir, s.ContextLength, s.TurboEnabled, s.WebSearchEnabled, s.SelectedModel, s.SidebarOpen, lastHomeView, s.ThinkEnabled, s.ThinkLevel, s.AutoUpdateEnabled)
 	if err != nil {
 		return fmt.Errorf("set settings: %w", err)
 	}
 	return nil
 }
 func (db *database) isCloudSettingMigrated() (bool, error) {
 	var migrated bool
 	err := db.conn.QueryRow("SELECT cloud_setting_migrated FROM settings").Scan(&migrated)
 	if err != nil {
 		return false, fmt.Errorf("get cloud setting migration status: %w", err)
 	}
 	return migrated, nil
 }
 func (db *database) setCloudSettingMigrated(migrated bool) error {
 	_, err := db.conn.Exec("UPDATE settings SET cloud_setting_migrated = ?", migrated)
 	if err != nil {
 		return fmt.Errorf("set cloud setting migration status: %w", err)
 	}
 	return nil
 }
 func (db *database) getAirplaneMode() (bool, error) {
 	var airplaneMode bool
 	err := db.conn.QueryRow("SELECT airplane_mode FROM settings").Scan(&airplaneMode)
 	if err != nil {
 		return false, fmt.Errorf("get airplane_mode: %w", err)
 	}
 	return airplaneMode, nil
 }
 func (db *database) getWindowSize() (int, int, error) {
 	var width, height int
 	err := db.conn.QueryRow("SELECT window_width, window_height FROM settings").Scan(&width, &height)
--- a/app/store/database_test.go
+++ b/app/store/database_test.go
@@ -98,6 +98,82 @@ func TestSchemaMigrations(t *testing.T) {
 	})
 }
 func TestMigrationV13ToV14ContextLength(t *testing.T) {
 	tmpDir := t.TempDir()
 	dbPath := filepath.Join(tmpDir, "test.db")
 	db, err := newDatabase(dbPath)
 	if err != nil {
 		t.Fatalf("failed to create database: %v", err)
 	}
 	defer db.Close()
 	_, err = db.conn.Exec("UPDATE settings SET context_length = 4096, schema_version = 13")
 	if err != nil {
 		t.Fatalf("failed to seed v13 settings row: %v", err)
 	}
 	if err := db.migrate(); err != nil {
 		t.Fatalf("migration from v13 to v14 failed: %v", err)
 	}
 	var contextLength int
 	if err := db.conn.QueryRow("SELECT context_length FROM settings").Scan(&contextLength); err != nil {
 		t.Fatalf("failed to read context_length: %v", err)
 	}
 	if contextLength != 0 {
 		t.Fatalf("expected context_length to migrate to 0, got %d", contextLength)
 	}
 	version, err := db.getSchemaVersion()
 	if err != nil {
 		t.Fatalf("failed to get schema version: %v", err)
 	}
 	if version != currentSchemaVersion {
 		t.Fatalf("expected schema version %d, got %d", currentSchemaVersion, version)
 	}
 }
 func TestMigrationV15ToV16LastHomeViewDefaultsToLaunch(t *testing.T) {
 	tmpDir := t.TempDir()
 	dbPath := filepath.Join(tmpDir, "test.db")
 	db, err := newDatabase(dbPath)
 	if err != nil {
 		t.Fatalf("failed to create database: %v", err)
 	}
 	defer db.Close()
 	if _, err := db.conn.Exec(`
 		ALTER TABLE settings DROP COLUMN last_home_view;
 		UPDATE settings SET schema_version = 15;
 	`); err != nil {
 		t.Fatalf("failed to seed v15 settings row: %v", err)
 	}
 	if err := db.migrate(); err != nil {
 		t.Fatalf("migration from v15 to v16 failed: %v", err)
 	}
 	var lastHomeView string
 	if err := db.conn.QueryRow("SELECT last_home_view FROM settings").Scan(&lastHomeView); err != nil {
 		t.Fatalf("failed to read last_home_view: %v", err)
 	}
 	if lastHomeView != "launch" {
 		t.Fatalf("expected last_home_view to default to launch after migration, got %q", lastHomeView)
 	}
 	version, err := db.getSchemaVersion()
 	if err != nil {
 		t.Fatalf("failed to get schema version: %v", err)
 	}
 	if version != currentSchemaVersion {
 		t.Fatalf("expected schema version %d, got %d", currentSchemaVersion, version)
 	}
 }
 func TestChatDeletionWithCascade(t *testing.T) {
 	t.Run("chat deletion cascades to related messages", func(t *testing.T) {
 		tmpDir := t.TempDir()
--- a/app/store/migration_test.go
+++ b/app/store/migration_test.go
@@ -127,6 +127,65 @@ func TestNoConfigToMigrate(t *testing.T) {
 	}
 }
 func TestCloudMigrationFromAirplaneMode(t *testing.T) {
 	tmpHome := t.TempDir()
 	setTestHome(t, tmpHome)
 	t.Setenv("OLLAMA_NO_CLOUD", "")
 	dbPath := filepath.Join(tmpHome, "db.sqlite")
 	db, err := newDatabase(dbPath)
 	if err != nil {
 		t.Fatalf("failed to create database: %v", err)
 	}
 	if _, err := db.conn.Exec("UPDATE settings SET airplane_mode = 1, cloud_setting_migrated = 0"); err != nil {
 		db.Close()
 		t.Fatalf("failed to seed airplane migration state: %v", err)
 	}
 	db.Close()
 	s := Store{DBPath: dbPath}
 	defer s.Close()
 	// Trigger DB initialization + one-time cloud migration.
 	if _, err := s.ID(); err != nil {
 		t.Fatalf("failed to initialize store: %v", err)
 	}
 	disabled, err := s.CloudDisabled()
 	if err != nil {
 		t.Fatalf("CloudDisabled() error: %v", err)
 	}
 	if !disabled {
 		t.Fatal("expected cloud to be disabled after migrating airplane_mode=true")
 	}
 	configPath := filepath.Join(tmpHome, ".ollama", serverConfigFilename)
 	data, err := os.ReadFile(configPath)
 	if err != nil {
 		t.Fatalf("failed to read migrated server config: %v", err)
 	}
 	var cfg map[string]any
 	if err := json.Unmarshal(data, &cfg); err != nil {
 		t.Fatalf("failed to parse migrated server config: %v", err)
 	}
 	if cfg["disable_ollama_cloud"] != true {
 		t.Fatalf("disable_ollama_cloud = %v, want true", cfg["disable_ollama_cloud"])
 	}
 	var airplaneMode, migrated bool
 	if err := s.db.conn.QueryRow("SELECT airplane_mode, cloud_setting_migrated FROM settings").Scan(&airplaneMode, &migrated); err != nil {
 		t.Fatalf("failed to read migration flags from DB: %v", err)
 	}
 	if !airplaneMode {
 		t.Fatal("expected legacy airplane_mode value to remain unchanged")
 	}
 	if !migrated {
 		t.Fatal("expected cloud_setting_migrated to be true")
 	}
 }
 const (
 	v1Schema = `
 	CREATE TABLE IF NOT EXISTS settings (
--- a/app/store/store.go
+++ b/app/store/store.go
@@ -149,9 +149,6 @@ type Settings struct {
 	// ContextLength specifies the context length for the ollama server (using OLLAMA_CONTEXT_LENGTH)
 	ContextLength int
 	// AirplaneMode when true, turns off Ollama Turbo features and only uses local models
 	AirplaneMode bool
 	// TurboEnabled indicates if Ollama Turbo features are enabled
 	TurboEnabled bool
@@ -169,6 +166,12 @@ type Settings struct {
 	// SidebarOpen indicates if the chat sidebar is open
 	SidebarOpen bool
 	// LastHomeView stores the preferred home route target ("chat" or integration name)
 	LastHomeView string
 	// AutoUpdateEnabled indicates if automatic updates should be downloaded
 	AutoUpdateEnabled bool
 }
 type Store struct {
@@ -259,6 +262,40 @@ func (s *Store) ensureDB() error {
 		}
 	}
 	// Run one-time migration from legacy airplane_mode behavior.
 	if err := s.migrateCloudSetting(database); err != nil {
 		return fmt.Errorf("migrate cloud setting: %w", err)
 	}
 	return nil
 }
 // migrateCloudSetting migrates legacy airplane_mode into server.json exactly once.
 // After this, cloud state is sourced from server.json OR OLLAMA_NO_CLOUD.
 func (s *Store) migrateCloudSetting(database *database) error {
 	migrated, err := database.isCloudSettingMigrated()
 	if err != nil {
 		return err
 	}
 	if migrated {
 		return nil
 	}
 	airplaneMode, err := database.getAirplaneMode()
 	if err != nil {
 		return err
 	}
 	if airplaneMode {
 		if err := setCloudEnabled(false); err != nil {
 			return fmt.Errorf("migrate airplane_mode to cloud disabled: %w", err)
 		}
 	}
 	if err := database.setCloudSettingMigrated(true); err != nil {
 		return err
 	}
 	return nil
 }
@@ -355,6 +392,10 @@ func (s *Store) Settings() (Settings, error) {
 		}
 	}
 	if settings.LastHomeView == "" {
 		settings.LastHomeView = "launch"
 	}
 	return settings, nil
 }
--- a/app/store/store_test.go
+++ b/app/store/store_test.go
@@ -81,6 +81,32 @@ func TestStore(t *testing.T) {
 		}
 	})
 	t.Run("settings default home view is launch", func(t *testing.T) {
 		loaded, err := s.Settings()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if loaded.LastHomeView != "launch" {
 			t.Fatalf("expected default LastHomeView to be launch, got %q", loaded.LastHomeView)
 		}
 	})
 	t.Run("settings empty home view falls back to launch", func(t *testing.T) {
 		if err := s.SetSettings(Settings{LastHomeView: ""}); err != nil {
 			t.Fatal(err)
 		}
 		loaded, err := s.Settings()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if loaded.LastHomeView != "launch" {
 			t.Fatalf("expected empty LastHomeView to fall back to launch, got %q", loaded.LastHomeView)
 		}
 	})
 	t.Run("window size", func(t *testing.T) {
 		if err := s.SetWindowSize(1024, 768); err != nil {
 			t.Fatal(err)
--- a/app/store/test_home_test.go
+++ b/app/store/test_home_test.go
@@ -0,0 +1,11 @@
 //go:build windows || darwin
 package store
 import "testing"
 func setTestHome(t *testing.T, home string) {
 	t.Helper()
 	t.Setenv("HOME", home)
 	t.Setenv("USERPROFILE", home)
 }
--- a/app/store/testdata/schema.sql
+++ b/app/store/testdata/schema.sql
@@ -13,7 +13,7 @@ CREATE TABLE IF NOT EXISTS settings (
    agent BOOLEAN NOT NULL DEFAULT 0,
    tools BOOLEAN NOT NULL DEFAULT 0,
    working_dir TEXT NOT NULL DEFAULT '',
-    context_length INTEGER NOT NULL DEFAULT 4096,
+    context_length INTEGER NOT NULL DEFAULT 0,
    window_width INTEGER NOT NULL DEFAULT 0,
    window_height INTEGER NOT NULL DEFAULT 0,
    config_migrated BOOLEAN NOT NULL DEFAULT 0,
--- a/app/tools/cloud_policy.go
+++ b/app/tools/cloud_policy.go
@@ -0,0 +1,35 @@
 //go:build windows || darwin
 package tools
 import (
 	"context"
 	"errors"
 	"github.com/ollama/ollama/api"
 	internalcloud "github.com/ollama/ollama/internal/cloud"
 )
 // ensureCloudEnabledForTool checks cloud policy from the connected Ollama server.
 // If policy cannot be determined, this fails closed and blocks the operation.
 func ensureCloudEnabledForTool(ctx context.Context, operation string) error {
 	// Reuse shared message formatting; policy evaluation is still done via
 	// the connected server's /api/status endpoint below.
 	disabledMessage := internalcloud.DisabledError(operation)
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return errors.New(disabledMessage + " (unable to verify server cloud policy)")
 	}
 	status, err := client.CloudStatusExperimental(ctx)
 	if err != nil {
 		return errors.New(disabledMessage + " (unable to verify server cloud policy)")
 	}
 	if status.Cloud.Disabled {
 		return errors.New(disabledMessage)
 	}
 	return nil
 }
--- a/app/tools/cloud_policy_test.go
+++ b/app/tools/cloud_policy_test.go
@@ -0,0 +1,73 @@
 //go:build windows || darwin
 package tools
 import (
 	"context"
 	"net/http"
 	"net/http/httptest"
 	"strings"
 	"testing"
 )
 func TestEnsureCloudEnabledForTool(t *testing.T) {
 	const op = "web search is unavailable"
 	const disabledPrefix = "ollama cloud is disabled: web search is unavailable"
 	t.Run("enabled allows tool execution", func(t *testing.T) {
 		ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 			if r.URL.Path != "/api/status" {
 				http.NotFound(w, r)
 				return
 			}
 			w.Header().Set("Content-Type", "application/json")
 			_, _ = w.Write([]byte(`{"cloud":{"disabled":false,"source":"none"}}`))
 		}))
 		t.Cleanup(ts.Close)
 		t.Setenv("OLLAMA_HOST", ts.URL)
 		if err := ensureCloudEnabledForTool(context.Background(), op); err != nil {
 			t.Fatalf("expected nil error, got %v", err)
 		}
 	})
 	t.Run("disabled blocks tool execution", func(t *testing.T) {
 		ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 			if r.URL.Path != "/api/status" {
 				http.NotFound(w, r)
 				return
 			}
 			w.Header().Set("Content-Type", "application/json")
 			_, _ = w.Write([]byte(`{"cloud":{"disabled":true,"source":"config"}}`))
 		}))
 		t.Cleanup(ts.Close)
 		t.Setenv("OLLAMA_HOST", ts.URL)
 		err := ensureCloudEnabledForTool(context.Background(), op)
 		if err == nil {
 			t.Fatal("expected error, got nil")
 		}
 		if got := err.Error(); got != disabledPrefix {
 			t.Fatalf("unexpected error: %q", got)
 		}
 	})
 	t.Run("status unavailable fails closed", func(t *testing.T) {
 		ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 			http.NotFound(w, r)
 		}))
 		t.Cleanup(ts.Close)
 		t.Setenv("OLLAMA_HOST", ts.URL)
 		err := ensureCloudEnabledForTool(context.Background(), op)
 		if err == nil {
 			t.Fatal("expected error, got nil")
 		}
 		if got := err.Error(); !strings.Contains(got, disabledPrefix) {
 			t.Fatalf("expected disabled prefix, got %q", got)
 		}
 		if got := err.Error(); !strings.Contains(got, "unable to verify server cloud policy") {
 			t.Fatalf("expected verification failure detail, got %q", got)
 		}
 	})
 }
--- a/app/tools/web_fetch.go
+++ b/app/tools/web_fetch.go
@@ -77,6 +77,10 @@ func (w *WebFetch) Execute(ctx context.Context, args map[string]any) (any, strin
 }
 func performWebFetch(ctx context.Context, targetURL string) (*FetchResponse, error) {
 	if err := ensureCloudEnabledForTool(ctx, "web fetch is unavailable"); err != nil {
 		return nil, err
 	}
 	reqBody := FetchRequest{URL: targetURL}
 	jsonBody, err := json.Marshal(reqBody)
 	if err != nil {
--- a/app/tools/web_search.go
+++ b/app/tools/web_search.go
@@ -93,6 +93,10 @@ func (w *WebSearch) Execute(ctx context.Context, args map[string]any) (any, stri
 }
 func performWebSearch(ctx context.Context, query string, maxResults int) (*SearchResponse, error) {
 	if err := ensureCloudEnabledForTool(ctx, "web search is unavailable"); err != nil {
 		return nil, err
 	}
 	reqBody := SearchRequest{Query: query, MaxResults: maxResults}
 	jsonBody, err := json.Marshal(reqBody)
--- a/app/ui/app/codegen/gotypes.gen.ts
+++ b/app/ui/app/codegen/gotypes.gen.ts
@@ -289,10 +289,12 @@ export class InferenceCompute {
 }
 export class InferenceComputeResponse {
    inferenceComputes: InferenceCompute[];
    defaultContextLength: number;
    constructor(source: any = {}) {
        if ('string' === typeof source) source = JSON.parse(source);
        this.inferenceComputes = this.convertValues(source["inferenceComputes"], InferenceCompute);
        this.defaultContextLength = source["defaultContextLength"];
    }
 	convertValues(a: any, classs: any, asMap: boolean = false): any {
@@ -406,13 +408,14 @@ export class Settings {
    Tools: boolean;
    WorkingDir: string;
    ContextLength: number;
    AirplaneMode: boolean;
    TurboEnabled: boolean;
    WebSearchEnabled: boolean;
    ThinkEnabled: boolean;
    ThinkLevel: string;
    SelectedModel: string;
    SidebarOpen: boolean;
    LastHomeView: string;
    AutoUpdateEnabled: boolean;
    constructor(source: any = {}) {
        if ('string' === typeof source) source = JSON.parse(source);
@@ -424,13 +427,14 @@ export class Settings {
        this.Tools = source["Tools"];
        this.WorkingDir = source["WorkingDir"];
        this.ContextLength = source["ContextLength"];
        this.AirplaneMode = source["AirplaneMode"];
        this.TurboEnabled = source["TurboEnabled"];
        this.WebSearchEnabled = source["WebSearchEnabled"];
        this.ThinkEnabled = source["ThinkEnabled"];
        this.ThinkLevel = source["ThinkLevel"];
        this.SelectedModel = source["SelectedModel"];
        this.SidebarOpen = source["SidebarOpen"];
        this.LastHomeView = source["LastHomeView"];
        this.AutoUpdateEnabled = source["AutoUpdateEnabled"];
    }
 }
 export class SettingsResponse {
@@ -548,14 +552,12 @@ export class Error {
    }
 }
 export class ModelUpstreamResponse {
-    digest?: string;
+    stale: boolean;
    pushTime: number;
    error?: string;
    constructor(source: any = {}) {
        if ('string' === typeof source) source = JSON.parse(source);
-        this.digest = source["digest"];
+        this.stale = source["stale"];
        this.pushTime = source["pushTime"];
        this.error = source["error"];
    }
 }
--- a/app/ui/app/public/launch-icons/claude.svg
+++ b/app/ui/app/public/launch-icons/claude.svg
@@ -0,0 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!-- Generated by Pixelmator Pro 3.6.17 -->
 <svg width="1200" height="1200" viewBox="0 0 1200 1200" xmlns="http://www.w3.org/2000/svg">
    <g id="g314">
        <path id="path147" fill="#d97757" stroke="none" d="M 233.959793 800.214905 L 468.644287 668.536987 L 472.590637 657.100647 L 468.644287 650.738403 L 457.208069 650.738403 L 417.986633 648.322144 L 283.892639 644.69812 L 167.597321 639.865845 L 54.926208 633.825623 L 26.577238 627.785339 L 3.3e-05 592.751709 L 2.73832 575.27533 L 26.577238 559.248352 L 60.724873 562.228149 L 136.187973 567.382629 L 249.422867 575.194763 L 331.570496 580.026978 L 453.261841 592.671082 L 472.590637 592.671082 L 475.328857 584.859009 L 468.724915 580.026978 L 463.570557 575.194763 L 346.389313 495.785217 L 219.543671 411.865906 L 153.100723 363.543762 L 117.181267 339.060425 L 99.060455 316.107361 L 91.248367 266.01355 L 123.865784 230.093994 L 167.677887 233.073853 L 178.872513 236.053772 L 223.248367 270.201477 L 318.040283 343.570496 L 441.825592 434.738342 L 459.946411 449.798706 L 467.194672 444.64447 L 468.080597 441.020203 L 459.946411 427.409485 L 392.617493 305.718323 L 320.778564 181.932983 L 288.80542 130.630859 L 280.348999 99.865845 C 277.369171 87.221436 275.194641 76.590698 275.194641 63.624268 L 312.322174 13.20813 L 332.8591 6.604126 L 382.389313 13.20813 L 403.248352 31.328979 L 434.013519 101.71814 L 483.865753 212.537048 L 561.181274 363.221497 L 583.812134 407.919434 L 595.892639 449.315491 L 600.40271 461.959839 L 608.214783 461.959839 L 608.214783 454.711609 L 614.577271 369.825623 L 626.335632 265.61084 L 637.771851 131.516846 L 641.718201 93.745117 L 660.402832 48.483276 L 697.530334 24.000122 L 726.52356 37.852417 L 750.362549 72 L 747.060486 94.067139 L 732.886047 186.201416 L 705.100708 330.52356 L 686.979919 427.167847 L 697.530334 427.167847 L 709.61084 415.087341 L 758.496704 350.174561 L 840.644348 247.490051 L 876.885925 206.738342 L 919.167847 161.71814 L 946.308838 140.29541 L 997.61084 140.29541 L 1035.38269 196.429626 L 1018.469849 254.416199 L 965.637634 321.422852 L 921.825562 378.201538 L 859.006714 462.765259 L 819.785278 530.41626 L 823.409424 535.812073 L 832.75177 534.92627 L 974.657776 504.724915 L 1051.328979 490.872559 L 1142.818848 475.167786 L 1184.214844 494.496582 L 1188.724854 514.147644 L 1172.456421 554.335693 L 1074.604126 578.496765 L 959.838989 601.449829 L 788.939636 641.879272 L 786.845764 643.409485 L 789.261841 646.389343 L 866.255127 653.637634 L 899.194702 655.409424 L 979.812134 655.409424 L 1129.932861 666.604187 L 1169.154419 692.537109 L 1192.671265 724.268677 L 1188.724854 748.429688 L 1128.322144 779.194641 L 1046.818848 759.865845 L 856.590759 714.604126 L 791.355774 698.335754 L 782.335693 698.335754 L 782.335693 703.731567 L 836.69812 756.885986 L 936.322205 846.845581 L 1061.073975 962.81897 L 1067.436279 991.490112 L 1051.409424 1014.120911 L 1034.496704 1011.704712 L 924.885986 929.234924 L 882.604126 892.107544 L 786.845764 811.48999 L 780.483276 811.48999 L 780.483276 819.946289 L 802.550415 852.241699 L 919.087341 1027.409424 L 925.127625 1081.127686 L 916.671204 1098.604126 L 886.469849 1109.154419 L 853.288696 1103.114136 L 785.073914 1007.355835 L 714.684631 899.516785 L 657.906067 802.872498 L 650.979858 806.81897 L 617.476624 1167.704834 L 601.771851 1186.147705 L 565.530212 1200 L 535.328857 1177.046997 L 519.302124 1139.919556 L 535.328857 1066.550537 L 554.657776 970.792053 L 570.362488 894.68457 L 584.536926 800.134277 L 592.993347 768.724976 L 592.429626 766.630859 L 585.503479 767.516968 L 514.22821 865.369263 L 405.825531 1011.865906 L 320.053711 1103.677979 L 299.516815 1111.812256 L 263.919525 1093.369263 L 267.221497 1060.429688 L 287.114136 1031.114136 L 405.825531 880.107361 L 477.422913 786.52356 L 523.651062 732.483276 L 523.328918 724.671265 L 520.590698 724.671265 L 205.288605 929.395935 L 149.154434 936.644409 L 124.993355 914.01355 L 127.973183 876.885986 L 139.409409 864.80542 L 234.201385 799.570435 L 233.879227 799.8927 Z"/>
    </g>
 </svg>
--- a/app/ui/app/public/launch-icons/codex-dark.svg
+++ b/app/ui/app/public/launch-icons/codex-dark.svg
@@ -0,0 +1 @@
 <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 320 320"><path fill="#fff" d="m297.06 130.97c7.26-21.79 4.76-45.66-6.85-65.48-17.46-30.4-52.56-46.04-86.84-38.68-15.25-17.18-37.16-26.95-60.13-26.81-35.04-.08-66.13 22.48-76.91 55.82-22.51 4.61-41.94 18.7-53.31 38.67-17.59 30.32-13.58 68.54 9.92 94.54-7.26 21.79-4.76 45.66 6.85 65.48 17.46 30.4 52.56 46.04 86.84 38.68 15.24 17.18 37.16 26.95 60.13 26.8 35.06.09 66.16-22.49 76.94-55.86 22.51-4.61 41.94-18.7 53.31-38.67 17.57-30.32 13.55-68.51-9.94-94.51zm-120.28 168.11c-14.03.02-27.62-4.89-38.39-13.88.49-.26 1.34-.73 1.89-1.07l63.72-36.8c3.26-1.85 5.26-5.32 5.24-9.07v-89.83l26.93 15.55c.29.14.48.42.52.74v74.39c-.04 33.08-26.83 59.9-59.91 59.97zm-128.84-55.03c-7.03-12.14-9.56-26.37-7.15-40.18.47.28 1.3.79 1.89 1.13l63.72 36.8c3.23 1.89 7.23 1.89 10.47 0l77.79-44.92v31.1c.02.32-.13.63-.38.83l-64.41 37.19c-28.69 16.52-65.33 6.7-81.92-21.95zm-16.77-139.09c7-12.16 18.05-21.46 31.21-26.29 0 .55-.03 1.52-.03 2.2v73.61c-.02 3.74 1.98 7.21 5.23 9.06l77.79 44.91-26.93 15.55c-.27.18-.61.21-.91.08l-64.42-37.22c-28.63-16.58-38.45-53.21-21.95-81.89zm221.26 51.49-77.79-44.92 26.93-15.54c.27-.18.61-.21.91-.08l64.42 37.19c28.68 16.57 38.51 53.26 21.94 81.94-7.01 12.14-18.05 21.44-31.2 26.28v-75.81c.03-3.74-1.96-7.2-5.2-9.06zm26.8-40.34c-.47-.29-1.3-.79-1.89-1.13l-63.72-36.8c-3.23-1.89-7.23-1.89-10.47 0l-77.79 44.92v-31.1c-.02-.32.13-.63.38-.83l64.41-37.16c28.69-16.55 65.37-6.7 81.91 22 6.99 12.12 9.52 26.31 7.15 40.1zm-168.51 55.43-26.94-15.55c-.29-.14-.48-.42-.52-.74v-74.39c.02-33.12 26.89-59.96 60.01-59.94 14.01 0 27.57 4.92 38.34 13.88-.49.26-1.33.73-1.89 1.07l-63.72 36.8c-3.26 1.85-5.26 5.31-5.24 9.06l-.04 89.79zm14.63-31.54 34.65-20.01 34.65 20v40.01l-34.65 20-34.65-20z"/></svg>
--- a/app/ui/app/public/launch-icons/codex.svg
+++ b/app/ui/app/public/launch-icons/codex.svg
@@ -0,0 +1 @@
 <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 320 320"><path d="m297.06 130.97c7.26-21.79 4.76-45.66-6.85-65.48-17.46-30.4-52.56-46.04-86.84-38.68-15.25-17.18-37.16-26.95-60.13-26.81-35.04-.08-66.13 22.48-76.91 55.82-22.51 4.61-41.94 18.7-53.31 38.67-17.59 30.32-13.58 68.54 9.92 94.54-7.26 21.79-4.76 45.66 6.85 65.48 17.46 30.4 52.56 46.04 86.84 38.68 15.24 17.18 37.16 26.95 60.13 26.8 35.06.09 66.16-22.49 76.94-55.86 22.51-4.61 41.94-18.7 53.31-38.67 17.57-30.32 13.55-68.51-9.94-94.51zm-120.28 168.11c-14.03.02-27.62-4.89-38.39-13.88.49-.26 1.34-.73 1.89-1.07l63.72-36.8c3.26-1.85 5.26-5.32 5.24-9.07v-89.83l26.93 15.55c.29.14.48.42.52.74v74.39c-.04 33.08-26.83 59.9-59.91 59.97zm-128.84-55.03c-7.03-12.14-9.56-26.37-7.15-40.18.47.28 1.3.79 1.89 1.13l63.72 36.8c3.23 1.89 7.23 1.89 10.47 0l77.79-44.92v31.1c.02.32-.13.63-.38.83l-64.41 37.19c-28.69 16.52-65.33 6.7-81.92-21.95zm-16.77-139.09c7-12.16 18.05-21.46 31.21-26.29 0 .55-.03 1.52-.03 2.2v73.61c-.02 3.74 1.98 7.21 5.23 9.06l77.79 44.91-26.93 15.55c-.27.18-.61.21-.91.08l-64.42-37.22c-28.63-16.58-38.45-53.21-21.95-81.89zm221.26 51.49-77.79-44.92 26.93-15.54c.27-.18.61-.21.91-.08l64.42 37.19c28.68 16.57 38.51 53.26 21.94 81.94-7.01 12.14-18.05 21.44-31.2 26.28v-75.81c.03-3.74-1.96-7.2-5.2-9.06zm26.8-40.34c-.47-.29-1.3-.79-1.89-1.13l-63.72-36.8c-3.23-1.89-7.23-1.89-10.47 0l-77.79 44.92v-31.1c-.02-.32.13-.63.38-.83l64.41-37.16c28.69-16.55 65.37-6.7 81.91 22 6.99 12.12 9.52 26.31 7.15 40.1zm-168.51 55.43-26.94-15.55c-.29-.14-.48-.42-.52-.74v-74.39c.02-33.12 26.89-59.96 60.01-59.94 14.01 0 27.57 4.92 38.34 13.88-.49.26-1.33.73-1.89 1.07l-63.72 36.8c-3.26 1.85-5.26 5.31-5.24 9.06l-.04 89.79zm14.63-31.54 34.65-20.01 34.65 20v40.01l-34.65 20-34.65-20z"/></svg>
--- a/app/ui/app/public/launch-icons/droid.svg
+++ b/app/ui/app/public/launch-icons/droid.svg
--- a/app/ui/app/public/launch-icons/openclaw.svg
+++ b/app/ui/app/public/launch-icons/openclaw.svg
@@ -0,0 +1,242 @@
 <svg version="1.2" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 500 500" width="500" height="500">
 	<style>
 		.s0 { fill: #f6f4f4 } 
 		.s1 { fill: #0b0303 } 
 		.s2 { fill: #ef0011 } 
 		.s3 { fill: #f3e2e2 } 
 		.s4 { fill: #f00212 } 
 		.s5 { fill: #ba000d } 
 		.s6 { fill: #faf1f1 } 
 		.s7 { fill: #0b0100 } 
 		.s8 { fill: #fbedee } 
 		.s9 { fill: #faeaea } 
 		.s10 { fill: #ab797d } 
 		.s11 { fill: #f8eaea } 
 		.s12 { fill: #902021 } 
 		.s13 { fill: #f9eeee } 
 		.s14 { fill: #f6ecec } 
 		.s15 { fill: #080201 } 
 		.s16 { fill: #150100 } 
 		.s17 { fill: #f2e7e7 } 
 		.s18 { fill: #fbe7e8 } 
 		.s19 { fill: #060101 } 
 		.s20 { fill: #f5e7e7 } 
 		.s21 { fill: #fa999e } 
 		.s22 { fill: #c46064 } 
 		.s23 { fill: #180300 } 
 		.s24 { fill: #f6dcdd } 
 		.s25 { fill: #f2e6e6 } 
 		.s26 { fill: #110200 } 
 		.s27 { fill: #eb0011 } 
 		.s28 { fill: #e20010 } 
 		.s29 { fill: #ea0011 } 
 		.s30 { fill: #760007 } 
 		.s31 { fill: #f00514 } 
 		.s32 { fill: #fcebeb } 
 		.s33 { fill: #ecd6d6 } 
 		.s34 { fill: #f5e3e3 } 
 		.s35 { fill: #f5e4e4 } 
 		.s36 { fill: #faf6f6 } 
 		.s37 { fill: #e50010 } 
 		.s38 { fill: #d5000f } 
 		.s39 { fill: #f2e2e3 } 
 		.s40 { fill: #ef1018 } 
 		.s41 { fill: #f4e8e9 } 
 		.s42 { fill: #ef0513 } 
 		.s43 { fill: #f5e5e5 } 
 		.s44 { fill: #f00413 } 
 		.s45 { fill: #f4e9ea } 
 		.s46 { fill: #ed0011 } 
 		.s47 { fill: #e80011 } 
 		.s48 { fill: #e60613 } 
 		.s49 { fill: #f0d6d6 } 
 		.s50 { fill: #fca9ac } 
 		.s51 { fill: #9c000c } 
 		.s52 { fill: #73393b } 
 	</style>
 	<g>
 		<path fill-rule="evenodd" class="s0" d="m166.5 52.5q3.5 0 7 0 2.75 2.99 1.5 7-21.27 45.61-20.5 96 39.99 2.76 72 26.5 7.87 6.86 13.5 15.5 42.88-56.39 103.5-92.5 47.35-25.46 101-25 14.52 0.38 23.5 11.5 3.19 7.74 2 16-1.81 7.18-4.5 14-1 0-1 1-5.04 6.05-9 13-1 0-1 1 0 0.5 0 1-12.42 12.15-28.5 19-6.02 36.27-41.5 45-0.83 2.75 0 5 19.02-12.85 41.5-9 10.85-8.09 23.5-13 15.01-6.37 31-2.5 14.09 7.43 14 23.5-2.83 23.25-15.5 43-6.42 9.92-14 19-10.04 8.8-19.5 18-72.02 48.88-156.5 27-19.63 9.6-41.5 10.5-4.59 1.27-9 3 2 1 4 2 20.09-1.11 35 12 25.46 6.95 37.5 30.5 1.26 5.69-1 11-3.38 3.79-7.5 6.5 5.74 10.07 1.5 20.5-7.55 7.47-17.5 3.5-11.01-5.34-22.5-9.5-18.26 10-38.5 13-15.5 0-31 0-26.62-4.54-51-17-4.17 1.33-8 3.5-7.23 5.87-15 11-8.62 2.58-13.5-4.5-1.82 2.32-4.5 3.5-6.06 2.24-12 3.5-7.5 0-15 0-27.42-2.56-50-18.5-18-17.25-23-41.5 0-11.5 0-23 4.12-22.7 25-33 6.95-16.67 22-26.5-20.39-20.8-14.5-49.5 7.01-26.98 28.5-44.5 7.56-5.27 15-10.5-13.09-30.88-7.5-64 3.16-15.57 14.5-26.5 6.85-2.48 8 4.5-6.59 39.53 11 75.5 7.99-0.49 16-2 2.42-34.57 14.5-67.5 8.51-22.23 27.5-36z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s1" d="m113.5 401.5q0.48-5.1-1-10-0.91 0.19-1 1-2.46 1.74-5 3.5 5.65 9.54-5 13-32.21 5.55-61-10-32.89-23.11-29.5-63.5 2.96-22.67 23.5-32 7.99-19.75 27-29.5-27.65-23.7-15.5-58.5 7.33-16.82 20.5-29.5 10.79-8.14 22-15.5-16.49-37.08-5.5-76 3.19-6.13 7.5-11.5 1.48-0.89 2 1-5.69 41.09 12.5 78.5 1 1 2 2 9.97-3.24 20.5-4 2 0 4 0 0-7.5 0-15 0.99-42.22 24.5-77 6.12-7.12 14-12-4.65 13.43-10 27-11.93 37.6-9.5 77 49.38 0.7 83.5 36 2.75 4.5 5.5 9 38.99-52.24 93-88.5 45.84-29.03 100-32.5 15.69-1.56 29 6.5 5.68 7.29 3.5 16.5-10.38 33.62-43.5 45-4.39 37.33-41 45-0.79 8.63-6 15.5 1.91 1.83 4.5 2.5 22.27-17.25 50.5-14.5 12.93-9.41 28-15 36.22-8.28 31.5 28.5-15.19 51.69-62.5 77.5-65.92 35.87-138 15.5-19.67 10.42-42 10.5-8.39 2.88-17 5 3.58 6.08 10 9 20.92-1.14 36 13 22.67 5.23 34.5 25.5 3.33 7.13-3.5 11.5-3.88 1.8-8 3 7.36 8.45 6.5 19.5-4.43 5.66-11.5 3.5-12.84-5.67-26-10.5-39.4 21.02-83 10.5-18.85-5.78-36.5-14.5-13.65 4.14-23.5 14.5-9.51 3.74-11-6.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s2" d="m153.5 173.5q24.62 1.46 46 13.5 12.11 8.1 17.5 21.5 0.74 2.45 0.5 5 0.09 0.81 1 1 1.48-4.9 1-10 5.04 10.48 1.5 22-9.81 27.86-35.5 42.5-26.17 14.97-56 19.5-2.77-0.4-2 1 2.86 1.27 6 1 25.64 1.53 48.5-10 0.34 10.08 2 20 1.08 5.76 5 10 1 1.5 0 3-31.11 20.84-68.5 17.5-23.7-5.7-32.5-28.5-4.39-9.18-3.5-19 15.41 6.23 32 4.5-20.68-6.39-39-18-34.81-27.22-12.5-65.5 11.84-14.83 29-23 4.21 7.66 11.5 12.5 3 1 6 0-26.04-34.62-29-78-0.13-8.46 2-16.5 1 6.5 2 13 3.43 39.53 24.5 73 2.03 2.28 4.5 4 0.5-1.25 1-2.5-1.27-6.54-5-12 0.5-0.75 1-1.5 9.72-3.43 20-4 0.55 10.34 8 17.5 1.94 0.74 4 0.5-17.8-64.6 16.5-122 0.98-1.79 1.5 0-28.21 56.64-13.5 118 1.08 1.43 2.5 0.5 2.21-4.98 2-10.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s3" d="m454.5 97.5q-18.37-2.97-37-1.5-16.14 2.08-32 5.5 32.38-14.09 67-7.5 1.98 1.22 2 3.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s4" d="m454.5 97.5q-1.33 11.18-8.5 20-21.81 26.28-55.5 32-1.11-0.2-2 0.5 2.31 2.82 5.5 4.5 1 2 0 4-9.56 11.3-19.5 20 19.71-8.72 31-27 2.68-0.43 5 1-14.24 30.97-48 36.5-9.93 1.71-20 1.5-6.8-0.48-13 1 5.81 6.92 14 11-10.78 16.03-27 26.5 27.16-7.4 38-33.5 4.34 1.35 9 1-9.08 23.84-33 33.5-18.45 6.41-38 7 22.59 8.92 45-1 12.05-5.52 24-11 9.01-1.79 17 2.5 5.28-4.38 11-8 12.8-6.07 27-5 0 0.5 0 1-19.34 2.69-34 15.5 0.5 0.25 1 0.5 17.79-8.09 36-15 2.71-0.79 5-2 2.5-1 5-2 5.53-4.04 11-8 11.7-4.18 24-6.5 7.78-1.36 15 1.5-2.97 18.45-13.5 34-34.92 49.37-94.5 62.5-59.27 12.45-108-23-15.53-12.52-21.5-31.5-2.47-14.26 4-27-3.15 24.41 14 42-4.92-10.28-7-22-1.97-17.63 7-33 47.28-69.5 125.5-100 15.86-3.42 32-5.5 18.63-1.47 37 1.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s5" d="m86.5 112.5q-1-6.5-2-13 0.7-5.34 3.5-10-1.8 11.32-1.5 23z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s6" d="m433.5 97.5q2.22-0.39 4 1-10 13.75-27 14-0.24-2.06 0.5-4 10.3-7.78 22.5-11z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s7" d="m407.5 101.5q2.55-0.24 5 0.5-52.87 18.31-84.5 64.5-6.94 7.95-17 11-9.38-2.38-5-11 40.38-48.62 101.5-65z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s8" d="m402.5 112.5q3 0 6 0-2.56 8.8-12 7-0.22-1.58 0.5-3 2.72-2.22 5.5-4z"/>
 	</g>
 	<g>
 	</g>
 	<g>
 	</g>
 	<g>
 	</g>
 	<g>
 	</g>
 	<g>
 	</g>
 	<g>
 	</g>
 	<g>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s9" d="m390.5 149.5q7.77 0.52 15 2-11.29 18.28-31 27 9.94-8.7 19.5-20 1-2 0-4-3.19-1.68-5.5-4.5 0.89-0.7 2-0.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s10" d="m131.5 145.5q0 7.5 0 15-2 0-4 0 1.06-1.36 3-1-0.48-7.29 1-14z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s11" d="m219.5 204.5q-1 4.5-2 9 0.24-2.55-0.5-5-5.39-13.4-17.5-21.5-21.38-12.04-46-13.5 0-2 0-4 36.7-0.86 61.5 26 3.06 4.11 4.5 9z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s12" d="m329.5 191.5q6.2-1.48 13-1-3.5 1-7 2-2.9-0.97-6-1z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s13" d="m329.5 191.5q3.1 0.03 6 1 9.55 1.31 19 3-10.84 26.1-38 33.5 16.22-10.47 27-26.5-8.19-4.08-14-11z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s14" d="m479.5 199.5q-7.22-2.86-15-1.5-12.3 2.32-24 6.5 15.6-13.11 36-11.5 3.63 2.26 3 6.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s15" d="m193.5 216.5q-12.01 1.52-22 8-2.83 1.29-5.5 3-4.79-4.57-6.5-11-5.04 2.2-9.5-1-3.47-6.4 3.5-3 4.4 0.05 8-2.5 9.22-9.73 21-16 6.3-3.24 12 1-2.9 1.22-6 1.5 2.61 5.74 4.5 12 0.75 3.97 0.5 8z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s16" d="m458.5 200.5q3.04-0.24 6 0.5-18.02 7.05-33 19-1 1-2 0 11.53-14.3 29-19.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s17" d="m178.5 202.5q6.85-0.63 4.5 6-7.6 5.09-6-4 1.08-0.82 1.5-2z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s18" d="m469.5 201.5q-2.26 13.65-14.5 22-0.47-2.11 1-4 7.08-8.82 13.5-18z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s19" d="m74.5 208.5q8.22-0.2 16 2.5 11.8 4.26 23.5 8.5 5.65-0.63 8-6 2.41 11.83-9.5 13 0.55 3.61 2 7-0.5 1-1 2-4.67-0.94-9.5-1-9.96 0.44-19.5 2.5-5.05-3.55-6.5-9.5-0.75-7.48-0.5-15-6.47 0.15-3-4z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s20" d="m429.5 212.5q-2.5 1-5 2-4 0-8 0-14.2-1.07-27 5 15.27-12.44 35-9.5 2.72 1.14 5 2.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s21" d="m219.5 204.5q0.48 5.1-1 10-0.91-0.19-1-1 1-4.5 2-9z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s22" d="m416.5 215.5q0-0.5 0-1 4 0 8 0-2.29 1.21-5 2-1.06-1.36-3-1z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s23" d="m416.5 215.5q1.94-0.36 3 1-18.21 6.91-36 15-0.5-0.25-1-0.5 14.66-12.81 34-15.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s24" d="m193.5 216.5q4.39 1.3 9 3-0.79 1.04-2 1.5-14.77-0.13-29 3.5 9.99-6.48 22-8z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s25" d="m98.5 219.5q6.09-0.98 6 5-3.04 0.24-6-0.5-1.84-2.24 0-4.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s26" d="m176.5 229.5q8.85-1.14 16 4-4.98 1.75-10 0-13.56 14.3-33 19.5-28.06 8.2-55 1 3.32-6.4 10-5.5-0.71 1.47-2 2.5 36.58 4.24 69-14 4.68-2.13 1-5 2.35-0.91 4-2.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s27" d="m231.5 238.5q1.31-0.2 2 1-3.13 28.62 15 51-16.25 6.75-27-7.5-1-1-2 0 14.73 29.34 46 18.5 1.79 0.52 0 1.5-37.63 16.82-50.5-22.5-5.1-26.48 16.5-42z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s28" d="m243.5 259.5q5.88 3.62 10.5 9 12.96 18.46 32.5 29.5-31.51-7.75-43-38.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s29" d="m203.5 266.5q1.31-0.2 2 1-2.48 22.08 12 39-6.99 1.35-14 0.5 4.59 4.08 10 7-8.71 0.28-14.5-6.5-16.98-22.76 4.5-41z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s27" d="m58.5 284.5q9.6-2.17 14.5 6 5.15 14.18-1 28-11.05-13.14-27.5-17.5 5.15-9.9 14-16.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s30" d="m129.5 288.5q2 1 4 2-3.14 0.27-6-1-0.77-1.4 2-1z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s31" d="m56.5 313.5q3.43 5.43 8 10-4.88 0.44-8 4-1.11-0.2-2 0.5 28.91 1.65 38 28.5 0.45 3.16-1 6-11.02-7.01-23-12.5-4.75-3.75-9.5-7.5 1.47 7.42 7 13 8.34 27.18 32 43 0.99 2.41-1.5 3.5-40.25 5.58-66.5-25.5-15.67-22.01-8-48 10.46-23.87 34.5-15z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s32" d="m45.5 317.5q4.03-0.25 8 0.5 2.46 4.16-2 6-6.04 2.01-9-3.5 1.26-1.85 3-3z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s33" d="m56.5 313.5q4.91 3.14 9.5 7 0.88 2.25-1.5 3-4.57-4.57-8-10z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s34" d="m198.5 319.5q-11.1 11.56-27 15.5-15.75 4.88-32 2.5 28.81-3.69 54-18.5 2.65-0.96 5 0.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s4" d="m198.5 319.5q1.44 0.68 2.5 2 2.41 8.23 6 16 1.2 2.64-0.5 5-30.65 21.41-68 18.5-25.16-6.17-32.5-30.5 6.96 4.99 15.5 6.5 8.99 0.75 18 0.5 16.25 2.38 32-2.5 15.9-3.94 27-15.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s35" d="m92.5 356.5q-9.09-26.85-38-28.5 0.89-0.7 2-0.5 25.47-4.89 35.5 19 0.75 4.98 0.5 10z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s36" d="m72.5 335.5q3.62-0.38 5 3-4.22 1.83-5-3z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s37" d="m223.5 336.5q5.59-0.48 11 1-4.04 4.16-8.5 8-5.99-3.8-2.5-9z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s38" d="m90.5 334.5q0.59-1.54 2-0.5 3.94 5.45 9 10 7 6 14 12-6.91-1.7-13-6-6.21-7.72-12-15.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s39" d="m261.5 346.5q-3.54-2.44-8-3.5-6.98-0.75-14-0.5 0.63-1.08 2-1.5 13.82-2.52 26 4-2.63 1.98-6 1.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s40" d="m239.5 342.5q7.02-0.25 14 0.5 4.46 1.06 8 3.5-5.2 2.35-10 5.5-3.88 4.65-9 7.5-9.89-3.09-9.5-13 2.36-3.63 6.5-4z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s41" d="m214.5 349.5q-21.43 15.48-48 16 22.82-5.9 43-18.5 3.64-1.12 5 2.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s42" d="m214.5 349.5q5.96 7.2 13.5 13 1 1 0 2-28.58 23.34-65.5 20.5-18.15-4.24-27.5-19.5 1.13 0.94 2.5 1.5 14.7 1.42 29-1.5 26.57-0.52 48-16z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s43" d="m302.5 373.5q-14.74-16.73-37-19-4.55 0.25-9 1 25.3-10.24 43.5 11 2.85 2.91 2.5 7z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s44" d="m302.5 373.5q0.21 2.44-2 3.5-28.69 7.6-50.5-12.5-0.06-6.71 6.5-9 4.45-0.75 9-1 22.26 2.27 37 19z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s45" d="m100.5 356.5q5.42 2.71 11 5.5-13.04 7.54-18.5 21.5-7.57-7.14-10.5-17 5.58 1.54 10 5.5 4.2 0.84 5.5-3.5 1.41-5.99 2.5-12z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s8" d="m83.5 394.5q-18.9-10.15-29.5-29-1.54-3.52-2-7 5.79 2.39 10 7 7.82 16.63 21.5 29z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s46" d="m232.5 365.5q17.6 6.19 10.5 23-10.6 10.42-25.5 11.5-25.94 3.21-49-9 36.75-1.65 64-25.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s47" d="m113.5 367.5q7.7-0.01 9.5 7-9.69 7.19-18.5 15.5-7.23 5.76-5.5-3.5 3.12-12.84 14.5-19z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s29" d="m126.5 380.5q7.88-0.4 12 6.5-8.5 7.25-17 14.5-5.62-12.55 5-21z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s48" d="m283.5 385.5q3.22 2.95 7 5.5 2.8 4.03 6 7.5 0.42 2.77-2 4-15.5-9.75-31-19.5-1.79-0.98 0-1.5 9.96 2.49 20 4z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s49" d="m283.5 385.5q8.71-1.27 11.5 7 1.22 2.9 1.5 6-3.2-3.47-6-7.5-3.78-2.55-7-5.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s50" d="m83.5 394.5q1.88-0.06 3 1.5-2.25 0.88-3-1.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s51" d="m258.5 392.5q3.51 0.41 0 2.5-2.33 1.93-5 2 2.61-2.28 5-4.5z"/>
 	</g>
 	<g>
 		<path fill-rule="evenodd" class="s52" d="m111.5 392.5q0.09-0.81 1-1 1.48 4.9 1 10-1-4.5-2-9z"/>
 	</g>
 </svg>
--- a/app/ui/app/public/launch-icons/opencode.svg
+++ b/app/ui/app/public/launch-icons/opencode.svg
@@ -0,0 +1,7 @@
 <svg xmlns="http://www.w3.org/2000/svg" version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" width="512" height="512"><svg width="512" height="512" viewBox="0 0 512 512" fill="none" xmlns="http://www.w3.org/2000/svg">
 <rect width="512" height="512" fill="#131010"></rect>
 <path d="M320 224V352H192V224H320Z" fill="#5A5858"></path>
 <path fill-rule="evenodd" clip-rule="evenodd" d="M384 416H128V96H384V416ZM320 160H192V352H320V160Z" fill="white"></path>
 </svg><style>@media (prefers-color-scheme: light) { :root { filter: none; } }
@media (prefers-color-scheme: dark) { :root { filter: none; } }
 </style></svg>
--- a/app/ui/app/public/launch-icons/pi-dark.svg
+++ b/app/ui/app/public/launch-icons/pi-dark.svg
@@ -0,0 +1,9 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 800">
  <rect width="800" height="800" rx="160" fill="#fff"/>
  <path fill="#000" fill-rule="evenodd" d="
    M165.29 165.29 H517.36 V400 H400 V517.36 H282.65 V634.72 H165.29 Z
    M282.65 282.65 V400 H400 V282.65 Z
  "/>
  <path fill="#000" d="M517.36 400 H634.72 V634.72 H517.36 Z"/>
 </svg>
--- a/app/ui/app/public/launch-icons/pi.svg
+++ b/app/ui/app/public/launch-icons/pi.svg
@@ -0,0 +1,9 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 800">
  <rect width="800" height="800" rx="160" fill="#000"/>
  <path fill="#fff" fill-rule="evenodd" d="
    M165.29 165.29 H517.36 V400 H400 V517.36 H282.65 V634.72 H165.29 Z
    M282.65 282.65 V400 H400 V282.65 Z
  "/>
  <path fill="#fff" d="M517.36 400 H634.72 V634.72 H517.36 Z"/>
 </svg>
--- a/app/ui/app/src/api.ts
+++ b/app/ui/app/src/api.ts
@@ -4,7 +4,6 @@ import {
  ChatEvent,
  DownloadEvent,
  ErrorEvent,
  InferenceCompute,
  InferenceComputeResponse,
  ModelCapabilitiesResponse,
  Model,
@@ -27,6 +26,12 @@ declare module "@/gotypes" {
 Model.prototype.isCloud = function (): boolean {
  return this.model.endsWith("cloud");
 };
 export type CloudStatusSource = "env" | "config" | "both" | "none";
 export interface CloudStatusResponse {
  disabled: boolean;
  source: CloudStatusSource;
 }
 // Helper function to convert Uint8Array to base64
 function uint8ArrayToBase64(uint8Array: Uint8Array): string {
  const chunkSize = 0x8000; // 32KB chunks to avoid stack overflow
@@ -156,7 +161,7 @@ export async function getModels(query?: string): Promise<Model[]> {
      // Add query if it's in the registry and not already in the list
      if (!exactMatch) {
        const result = await getModelUpstreamInfo(new Model({ model: query }));
-        const existsUpstream = !!result.digest && !result.error;
+        const existsUpstream = result.exists;
        if (existsUpstream) {
          filteredModels.push(new Model({ model: query }));
        }
@@ -285,6 +290,28 @@ export async function updateSettings(settings: Settings): Promise<{
  };
 }
 export async function updateCloudSetting(
  enabled: boolean,
 ): Promise<CloudStatusResponse> {
  const response = await fetch(`${API_BASE}/api/v1/cloud`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({ enabled }),
  });
  if (!response.ok) {
    const error = await response.text();
    throw new Error(error || "Failed to update cloud setting");
  }
  const data = await response.json();
  return {
    disabled: Boolean(data.disabled),
    source: (data.source as CloudStatusSource) || "none",
  };
 }
 export async function renameChat(chatId: string, title: string): Promise<void> {
  const response = await fetch(`${API_BASE}/api/v1/chat/${chatId}/rename`, {
    method: "PUT",
@@ -312,7 +339,7 @@ export async function deleteChat(chatId: string): Promise<void> {
 // Get upstream information for model staleness checking
 export async function getModelUpstreamInfo(
  model: Model,
-): Promise<{ digest?: string; pushTime: number; error?: string }> {
+): Promise<{ stale: boolean; exists: boolean; error?: string }> {
  try {
    const response = await fetch(`${API_BASE}/api/v1/model/upstream`, {
      method: "POST",
@@ -326,22 +353,22 @@ export async function getModelUpstreamInfo(
    if (!response.ok) {
      console.warn(
-        `Failed to check upstream digest for ${model.model}: ${response.status}`,
+        `Failed to check upstream for ${model.model}: ${response.status}`,
      );
-      return { pushTime: 0 };
+      return { stale: false, exists: false };
    }
    const data = await response.json();
    if (data.error) {
-      console.warn(`Upstream digest check: ${data.error}`);
+      console.warn(`Upstream check: ${data.error}`);
-      return { error: data.error, pushTime: 0 };
+      return { stale: false, exists: false, error: data.error };
    }
-    return { digest: data.digest, pushTime: data.pushTime || 0 };
+    return { stale: !!data.stale, exists: true };
  } catch (error) {
    console.warn(`Error checking model staleness:`, error);
-    return { pushTime: 0 };
+    return { stale: false, exists: false };
  }
 }
@@ -379,7 +406,7 @@ export async function* pullModel(
  }
 }
-export async function getInferenceCompute(): Promise<InferenceCompute[]> {
+export async function getInferenceCompute(): Promise<InferenceComputeResponse> {
  const response = await fetch(`${API_BASE}/api/v1/inference-compute`);
  if (!response.ok) {
    throw new Error(
@@ -388,8 +415,7 @@ export async function getInferenceCompute(): Promise<InferenceCompute[]> {
  }
  const data = await response.json();
-  const inferenceComputeResponse = new InferenceComputeResponse(data);
+  return new InferenceComputeResponse(data);
  return inferenceComputeResponse.inferenceComputes || [];
 }
 export async function fetchHealth(): Promise<boolean> {
@@ -414,3 +440,16 @@ export async function fetchHealth(): Promise<boolean> {
    return false;
  }
 }
 export async function getCloudStatus(): Promise<CloudStatusResponse | null> {
  const response = await fetch(`${API_BASE}/api/v1/cloud`);
  if (!response.ok) {
    throw new Error(`Failed to fetch cloud status: ${response.status}`);
  }
  const data = await response.json();
  return {
    disabled: Boolean(data.disabled),
    source: (data.source as CloudStatusSource) || "none",
  };
 }
--- a/app/ui/app/src/components/ChatForm.tsx
+++ b/app/ui/app/src/components/ChatForm.tsx
@@ -17,11 +17,15 @@ import {
 } from "@/hooks/useChats";
 import { useNavigate } from "@tanstack/react-router";
 import { useSelectedModel } from "@/hooks/useSelectedModel";
-import { useHasVisionCapability } from "@/hooks/useModelCapabilities";
+import {
  useHasVisionCapability,
  useHasToolsCapability,
 } from "@/hooks/useModelCapabilities";
 import { useUser } from "@/hooks/useUser";
 import { DisplayLogin } from "@/components/DisplayLogin";
 import { ErrorEvent, Message } from "@/gotypes";
 import { useSettings } from "@/hooks/useSettings";
 import { useCloudStatus } from "@/hooks/useCloudStatus";
 import { ThinkButton } from "./ThinkButton";
 import { ErrorMessage } from "./ErrorMessage";
 import { processFiles } from "@/utils/fileValidation";
@@ -141,19 +145,14 @@ function ChatForm({
  const {
    settings: {
      webSearchEnabled,
      airplaneMode,
      thinkEnabled,
      thinkLevel: settingsThinkLevel,
    },
    setSettings,
  } = useSettings();
  const { cloudDisabled } = useCloudStatus();
-  // current supported models for web search
+  const supportsWebSearch = useHasToolsCapability(selectedModel?.model);
  const modelLower = selectedModel?.model.toLowerCase() || "";
  const supportsWebSearch =
    modelLower.startsWith("gpt-oss") ||
    modelLower.startsWith("qwen3") ||
    modelLower.startsWith("deepseek-v3");
  // Use per-chat thinking level instead of global
  const thinkLevel: ThinkingLevel =
    settingsThinkLevel === "none" || !settingsThinkLevel
@@ -180,6 +179,12 @@ function ChatForm({
    setSettings,
  ]);
  useEffect(() => {
    if (cloudDisabled && webSearchEnabled) {
      setSettings({ WebSearchEnabled: false });
    }
  }, [cloudDisabled, webSearchEnabled, setSettings]);
  const removeFile = (index: number) => {
    setMessage((prev) => ({
      ...prev,
@@ -234,19 +239,19 @@ function ChatForm({
  // Determine if login banner should be shown
  const shouldShowLoginBanner =
    !cloudDisabled &&
    !isLoadingUser &&
    !isAuthenticated &&
-    ((webSearchEnabled && supportsWebSearch) ||
+    ((webSearchEnabled && supportsWebSearch) || selectedModel?.isCloud());
      (selectedModel?.isCloud() && !airplaneMode));
  // Determine which feature to highlight in the banner
  const getActiveFeatureForBanner = () => {
    if (cloudDisabled) return null;
    if (!isAuthenticated) {
      if (loginPromptFeature) return loginPromptFeature;
-      if (webSearchEnabled && selectedModel?.isCloud() && !airplaneMode)
+      if (webSearchEnabled && selectedModel?.isCloud()) return "webSearch";
        return "webSearch";
      if (webSearchEnabled) return "webSearch";
-      if (selectedModel?.isCloud() && !airplaneMode) return "turbo";
+      if (selectedModel?.isCloud()) return "turbo";
    }
    return null;
  };
@@ -269,11 +274,12 @@ function ChatForm({
  useEffect(() => {
    if (
      isAuthenticated ||
-      (!webSearchEnabled && !!selectedModel?.isCloud() && !airplaneMode)
+      cloudDisabled ||
      (!webSearchEnabled && !!selectedModel?.isCloud())
    ) {
      setLoginPromptFeature(null);
    }
-  }, [isAuthenticated, webSearchEnabled, selectedModel, airplaneMode]);
+  }, [isAuthenticated, webSearchEnabled, selectedModel, cloudDisabled]);
  // When entering edit mode, populate the composition with existing data
  useEffect(() => {
@@ -465,20 +471,27 @@ function ChatForm({
  const handleSubmit = async () => {
    if (!message.content.trim() || isStreaming || isDownloading) return;
    if (cloudDisabled && selectedModel?.isCloud()) {
      return;
    }
    // Check if cloud mode is enabled but user is not authenticated
    if (shouldShowLoginBanner) {
      return;
    }
-    // Prepare attachments for submission
+    // Prepare attachments for submission, excluding unsupported images
-    const attachmentsToSend: FileAttachment[] = message.attachments.map(
+    const attachmentsToSend: FileAttachment[] = message.attachments
-      (att) => ({
+      .filter(
        (att) => hasVisionCapability || !isImageFile(att.filename),
      )
      .map((att) => ({
        filename: att.filename,
        data: att.data || new Uint8Array(0), // Empty data for existing files
-      }),
+      }));
    );
-    const useWebSearch = supportsWebSearch && webSearchEnabled && !airplaneMode;
+    const useWebSearch =
      supportsWebSearch && webSearchEnabled && !cloudDisabled;
    const useThink = modelSupportsThinkingLevels
      ? thinkLevel
      : supportsThinkToggling
@@ -725,10 +738,17 @@ function ChatForm({
        )}
        {(message.attachments.length > 0 || message.fileErrors.length > 0) && (
          <div className="flex gap-2 overflow-x-auto px-3 pt pb-3 w-full scrollbar-hide">
-            {message.attachments.map((attachment, index) => (
+            {message.attachments.map((attachment, index) => {
              const isUnsupportedImage =
                !hasVisionCapability && isImageFile(attachment.filename);
              return (
              <div
                key={attachment.id}
-                className="group flex items-center gap-2 py-2 px-3 rounded-lg bg-neutral-50 dark:bg-neutral-700/50 hover:bg-neutral-100 dark:hover:bg-neutral-700 transition-colors flex-shrink-0"
+                className={`group flex items-center gap-2 py-2 px-3 rounded-lg transition-colors flex-shrink-0 ${
                  isUnsupportedImage
                    ? "bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800"
                    : "bg-neutral-50 dark:bg-neutral-700/50 hover:bg-neutral-100 dark:hover:bg-neutral-700"
                }`}
              >
                {isImageFile(attachment.filename) ? (
                  <ImageThumbnail
@@ -753,9 +773,16 @@ function ChatForm({
                    />
                  </svg>
                )}
-                <span className="text-sm text-neutral-700 dark:text-neutral-300 max-w-[150px] truncate">
+                <div className="flex flex-col min-w-0">
-                  {attachment.filename}
+                  <span className={`text-sm max-w-36 truncate ${isUnsupportedImage ? "text-red-700 dark:text-red-300" : "text-neutral-700 dark:text-neutral-300"}`}>
-                </span>
+                    {attachment.filename}
                  </span>
                  {isUnsupportedImage && (
                    <span className="text-xs text-red-600 dark:text-red-400 opacity-75">
                      This model does not support images
                    </span>
                  )}
                </div>
                <button
                  type="button"
                  onClick={() => removeFile(index)}
@@ -777,7 +804,8 @@ function ChatForm({
                  </svg>
                </button>
              </div>
-            ))}
+              );
            })}
            {message.fileErrors.map((fileError, index) => (
              <div
                key={`error-${index}`}
@@ -899,7 +927,7 @@ function ChatForm({
                )}
                <WebSearchButton
                  ref={webSearchButtonRef}
-                  isVisible={supportsWebSearch && airplaneMode === false}
+                  isVisible={supportsWebSearch && cloudDisabled === false}
                  isActive={webSearchEnabled}
                  onToggle={() => {
                    if (!webSearchEnabled && !isAuthenticated) {
@@ -940,6 +968,7 @@ function ChatForm({
                !isDownloading &&
                (!message.content.trim() ||
                  shouldShowLoginBanner ||
                  (cloudDisabled && selectedModel?.isCloud()) ||
                  message.fileErrors.length > 0)
              }
              className={`flex items-center justify-center h-9 w-9 rounded-full disabled:cursor-default cursor-pointer bg-black text-white dark:bg-white dark:text-black disabled:opacity-10 focus:outline-none focus:ring-2 focus:ring-blue-500`}
--- a/app/ui/app/src/components/ChatSidebar.tsx
+++ b/app/ui/app/src/components/ChatSidebar.tsx
@@ -6,12 +6,13 @@ import { getChat } from "@/api";
 import { Link } from "@/components/ui/link";
 import { useState, useRef, useEffect, useCallback, useMemo } from "react";
 import { ChatsResponse } from "@/gotypes";
-import { CogIcon } from "@heroicons/react/24/outline";
+import { CogIcon, RocketLaunchIcon } from "@heroicons/react/24/outline";
 // there's a hidden debug feature to copy a chat's data to the clipboard by
 // holding shift and clicking this many times within this many seconds
 const DEBUG_SHIFT_CLICKS_REQUIRED = 5;
 const DEBUG_SHIFT_CLICK_WINDOW_MS = 7000; // 7 seconds
 const launchSidebarRequestedKey = "ollama.launchSidebarRequested";
 interface ChatSidebarProps {
  currentChatId?: string;
@@ -267,9 +268,8 @@ export function ChatSidebar({ currentChatId }: ChatSidebarProps) {
        <Link
          href="/c/new"
          mask={{ to: "/" }}
-          className={`flex w-full items-center gap-3 rounded-lg px-2 py-2 text-left text-sm text-neutral-700 hover:bg-neutral-100 dark:hover:bg-neutral-800 dark:text-neutral-100 ${
+          className={`flex w-full items-center gap-3 rounded-lg px-2 py-2 text-left text-sm text-neutral-700 hover:bg-neutral-100 dark:hover:bg-neutral-800 dark:text-neutral-100 ${currentChatId === "new" ? "bg-neutral-100 dark:bg-neutral-800" : ""
-            currentChatId === "new" ? "bg-neutral-100 dark:bg-neutral-800" : ""
+            }`}
          }`}
          draggable={false}
        >
          <svg
@@ -283,6 +283,23 @@ export function ChatSidebar({ currentChatId }: ChatSidebarProps) {
          </svg>
          <span className="truncate">New Chat</span>
        </Link>
        <Link
          to="/c/$chatId"
          params={{ chatId: "launch" }}
          onClick={() => {
            if (currentChatId !== "launch") {
              sessionStorage.setItem(launchSidebarRequestedKey, "1");
            }
          }}
          className={`flex w-full items-center gap-3 rounded-lg px-2 py-2 text-left text-sm text-neutral-700 hover:bg-neutral-100 dark:hover:bg-neutral-800 dark:text-neutral-100 cursor-pointer ${currentChatId === "launch"
            ? "bg-neutral-100 dark:bg-neutral-800"
            : ""
            }`}
          draggable={false}
        >
          <RocketLaunchIcon className="h-5 w-5 stroke-current" />
          <span className="truncate">Launch</span>
        </Link>
        {isWindows && (
          <Link
            href="/settings"
@@ -304,19 +321,18 @@ export function ChatSidebar({ currentChatId }: ChatSidebarProps) {
              {group.chats.map((chat) => (
                <div
                  key={chat.id}
-                  className={`allow-context-menu flex items-center relative text-sm text-neutral-800 dark:text-neutral-400 rounded-lg hover:bg-neutral-100 dark:hover:bg-neutral-800 ${
+                  className={`allow-context-menu flex items-center relative text-sm text-neutral-800 dark:text-neutral-400 rounded-lg hover:bg-neutral-100 dark:hover:bg-neutral-800 ${chat.id === currentChatId
-                    chat.id === currentChatId
+                    ? "bg-neutral-100 text-black dark:bg-neutral-800"
-                      ? "bg-neutral-100 text-black dark:bg-neutral-800"
+                    : ""
-                      : ""
+                    }`}
                  }`}
                  onMouseEnter={() => handleMouseEnter(chat.id)}
                  onContextMenu={(e) =>
                    handleContextMenu(
                      e,
                      chat.id,
                      chat.title ||
-                        chat.userExcerpt ||
+                      chat.userExcerpt ||
-                        chat.createdAt.toLocaleString(),
+                      chat.createdAt.toLocaleString(),
                    )
                  }
                >
--- a/app/ui/app/src/components/CopyButton.tsx
+++ b/app/ui/app/src/components/CopyButton.tsx
@@ -10,6 +10,7 @@ interface CopyButtonProps {
  showLabels?: boolean;
  className?: string;
  title?: string;
  onCopy?: () => void;
 }
 const CopyButton: React.FC<CopyButtonProps> = ({
@@ -20,6 +21,7 @@ const CopyButton: React.FC<CopyButtonProps> = ({
  showLabels = false,
  className = "",
  title = "",
  onCopy,
 }) => {
  const [isCopied, setIsCopied] = useState(false);
@@ -48,12 +50,14 @@ const CopyButton: React.FC<CopyButtonProps> = ({
      }
      setIsCopied(true);
      onCopy?.();
      setTimeout(() => setIsCopied(false), 2000);
    } catch (error) {
      console.error("Clipboard API failed, falling back to plain text", error);
      try {
        await navigator.clipboard.writeText(content);
        setIsCopied(true);
        onCopy?.();
        setTimeout(() => setIsCopied(false), 2000);
      } catch (fallbackError) {
        console.error("Fallback copy also failed:", fallbackError);
--- a/app/ui/app/src/components/LaunchCommands.tsx
+++ b/app/ui/app/src/components/LaunchCommands.tsx
@@ -0,0 +1,133 @@
 import { useSettings } from "@/hooks/useSettings";
 import CopyButton from "@/components/CopyButton";
 interface LaunchCommand {
  id: string;
  name: string;
  command: string;
  description: string;
  icon: string;
  darkIcon?: string;
  iconClassName?: string;
  borderless?: boolean;
 }
 const LAUNCH_COMMANDS: LaunchCommand[] = [
  {
    id: "openclaw",
    name: "OpenClaw",
    command: "ollama launch openclaw",
    description: "Personal AI with 100+ skills",
    icon: "/launch-icons/openclaw.svg",
  },
  {
    id: "claude",
    name: "Claude",
    command: "ollama launch claude",
    description: "Anthropic's coding tool with subagents",
    icon: "/launch-icons/claude.svg",
    iconClassName: "h-7 w-7",
  },
  {
    id: "codex",
    name: "Codex",
    command: "ollama launch codex",
    description: "OpenAI's open-source coding agent",
    icon: "/launch-icons/codex.svg",
    darkIcon: "/launch-icons/codex-dark.svg",
    iconClassName: "h-7 w-7",
  },
  {
    id: "opencode",
    name: "OpenCode",
    command: "ollama launch opencode",
    description: "Anomaly's open-source coding agent",
    icon: "/launch-icons/opencode.svg",
    iconClassName: "h-7 w-7 rounded",
  },
  {
    id: "droid",
    name: "Droid",
    command: "ollama launch droid",
    description: "Factory's coding agent across terminal and IDEs",
    icon: "/launch-icons/droid.svg",
  },
  {
    id: "pi",
    name: "Pi",
    command: "ollama launch pi",
    description: "Minimal AI agent toolkit with plugin support",
    icon: "/launch-icons/pi.svg",
    darkIcon: "/launch-icons/pi-dark.svg",
    iconClassName: "h-7 w-7",
  },
 ];
 export default function LaunchCommands() {
  const isWindows = navigator.platform.toLowerCase().includes("win");
  const { setSettings } = useSettings();
  const renderCommandCard = (item: LaunchCommand) => (
    <div key={item.command} className="w-full text-left">
      <div className="flex items-start gap-4 sm:gap-5">
        <div
          aria-hidden="true"
          className={`flex h-10 w-10 shrink-0 items-center justify-center rounded-lg overflow-hidden ${item.borderless ? "" : "border border-neutral-200 bg-white dark:border-neutral-700 dark:bg-neutral-900"}`}
        >
          {item.darkIcon ? (
            <picture>
              <source srcSet={item.darkIcon} media="(prefers-color-scheme: dark)" />
              <img src={item.icon} alt="" className={`${item.iconClassName ?? "h-8 w-8"} rounded-sm`} />
            </picture>
          ) : (
            <img src={item.icon} alt="" className={item.borderless ? "h-full w-full rounded-xl" : `${item.iconClassName ?? "h-8 w-8"} rounded-sm`} />
          )}
        </div>
        <div className="min-w-0 flex-1">
          <span className="text-sm font-medium text-neutral-900 dark:text-neutral-100">
            {item.name}
          </span>
          <p className="mt-0.5 text-xs text-neutral-500 dark:text-neutral-400">
            {item.description}
          </p>
          <div className="mt-2 flex items-center gap-2 rounded-xl border-neutral-200 dark:border-neutral-700 bg-neutral-50 dark:bg-neutral-800 px-3 py-2">
            <code className="min-w-0 flex-1 truncate text-xs text-neutral-600 dark:text-neutral-300">
              {item.command}
            </code>
            <CopyButton
              content={item.command}
              size="md"
              title="Copy command to clipboard"
              className="text-neutral-500 dark:text-neutral-400 hover:text-neutral-700 dark:hover:text-neutral-200 hover:bg-neutral-200/60 dark:hover:bg-neutral-700/70"
              onCopy={() => {
                setSettings({ LastHomeView: item.id }).catch(() => { });
              }}
            />
          </div>
        </div>
      </div>
    </div>
  );
  return (
    <main className="flex h-screen w-full flex-col relative">
      <section
        className={`flex-1 overflow-y-auto overscroll-contain relative min-h-0 ${isWindows ? "xl:pt-4" : "xl:pt-8"}`}
      >
        <div className="max-w-[730px] mx-auto w-full px-4 pt-4 pb-20 sm:px-6 sm:pt-6 sm:pb-24 lg:px-8 lg:pt-8 lg:pb-28">
          <h1 className="text-xl font-semibold text-neutral-900 dark:text-neutral-100">
            Launch
          </h1>
          <p className="mt-1 text-sm text-neutral-500 dark:text-neutral-400">
            Copy a command and run it in your terminal.
          </p>
          <div className="mt-6 grid gap-7">
            {LAUNCH_COMMANDS.map(renderCommandCard)}
          </div>
        </div>
      </section>
    </main>
  );
 }
--- a/app/ui/app/src/components/Message.tsx
+++ b/app/ui/app/src/components/Message.tsx
@@ -536,7 +536,7 @@ function ToolCallDisplay({
    let args: Record<string, unknown> | null = null;
    try {
      args = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
-    } catch (e) {
+    } catch {
      args = null;
    }
    const query = args && typeof args.query === "string" ? args.query : "";
@@ -562,7 +562,7 @@ function ToolCallDisplay({
    let args: Record<string, unknown> | null = null;
    try {
      args = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
-    } catch (e) {
+    } catch {
      args = null;
    }
    const url = args && typeof args.url === "string" ? args.url : "";
--- a/app/ui/app/src/components/MessageList.tsx
+++ b/app/ui/app/src/components/MessageList.tsx
@@ -73,7 +73,7 @@ export default function MessageList({
                    ? String(args.url).trim()
                    : "";
              if (candidate) lastQuery = candidate;
-            } catch {}
+            } catch { /* ignored */ }
          }
        }
      }
--- a/app/ui/app/src/components/ModelPicker.tsx
+++ b/app/ui/app/src/components/ModelPicker.tsx
@@ -8,7 +8,7 @@ import {
 } from "react";
 import { Model } from "@/gotypes";
 import { useSelectedModel } from "@/hooks/useSelectedModel";
-import { useSettings } from "@/hooks/useSettings";
+import { useCloudStatus } from "@/hooks/useCloudStatus";
 import { useQueryClient } from "@tanstack/react-query";
 import { getModelUpstreamInfo } from "@/api";
 import { ArrowDownTrayIcon } from "@heroicons/react/24/outline";
@@ -34,7 +34,7 @@ export const ModelPicker = forwardRef<
    chatId,
    searchQuery,
  );
-  const { settings } = useSettings();
+  const { cloudDisabled } = useCloudStatus();
  const dropdownRef = useRef<HTMLDivElement>(null);
  const searchInputRef = useRef<HTMLInputElement>(null);
  const queryClient = useQueryClient();
@@ -61,24 +61,7 @@ export const ModelPicker = forwardRef<
    try {
      const upstreamInfo = await getModelUpstreamInfo(model);
-      // Compare local digest with upstream digest
+      if (upstreamInfo.stale) {
      let isStale =
        model.digest &&
        upstreamInfo.digest &&
        model.digest !== upstreamInfo.digest;
      // If the model has a modified time and upstream has a push time,
      // check if the model was modified after the push time - if so, it's not stale
      if (isStale && model.modified_at && upstreamInfo.pushTime > 0) {
        const modifiedAtTime =
          new Date(model.modified_at as string | number | Date).getTime() /
          1000;
        if (modifiedAtTime > upstreamInfo.pushTime) {
          isStale = false;
        }
      }
      if (isStale) {
        const currentStaleModels =
          queryClient.getQueryData<Map<string, boolean>>(["staleModels"]) ||
          new Map();
@@ -219,7 +202,7 @@ export const ModelPicker = forwardRef<
            models={models}
            selectedModel={selectedModel}
            onModelSelect={handleModelSelect}
-            airplaneMode={settings.airplaneMode}
+            cloudDisabled={cloudDisabled}
            isOpen={isOpen}
          />
        </div>
@@ -233,13 +216,13 @@ export const ModelList = forwardRef(function ModelList(
    models,
    selectedModel,
    onModelSelect,
-    airplaneMode,
+    cloudDisabled,
    isOpen,
  }: {
    models: Model[];
    selectedModel: Model | null;
    onModelSelect: (model: Model) => void;
-    airplaneMode: boolean;
+    cloudDisabled: boolean;
    isOpen: boolean;
  },
  ref,
@@ -348,7 +331,7 @@ export const ModelList = forwardRef(function ModelList(
                  </svg>
                )}
                {model.digest === undefined &&
-                  (airplaneMode || !model.isCloud()) && (
+                  (cloudDisabled || !model.isCloud()) && (
                    <ArrowDownTrayIcon
                      className="h-4 w-4 text-neutral-500 dark:text-neutral-400"
                      strokeWidth={1.75}
--- a/app/ui/app/src/components/Settings.tsx
+++ b/app/ui/app/src/components/Settings.tsx
@@ -11,15 +11,24 @@ import {
  FolderIcon,
  BoltIcon,
  WrenchIcon,
  CloudIcon,
  XMarkIcon,
  CogIcon,
  ArrowLeftIcon,
  ArrowDownTrayIcon,
 } from "@heroicons/react/20/solid";
 import { Settings as SettingsType } from "@/gotypes";
 import { useNavigate } from "@tanstack/react-router";
 import { useUser } from "@/hooks/useUser";
 import { useCloudStatus } from "@/hooks/useCloudStatus";
 import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query";
-import { getSettings, updateSettings } from "@/api";
+import {
  getSettings,
  type CloudStatusResponse,
  updateCloudSetting,
  updateSettings,
  getInferenceCompute,
 } from "@/api";
 function AnimatedDots() {
  return (
@@ -53,6 +62,11 @@ export default function Settings() {
  const [connectionError, setConnectionError] = useState<string | null>(null);
  const [pollingInterval, setPollingInterval] = useState<number | null>(null);
  const navigate = useNavigate();
  const {
    cloudDisabled,
    cloudStatus,
    isLoading: cloudStatusLoading,
  } = useCloudStatus();
  const {
    data: settingsData,
@@ -65,6 +79,13 @@ export default function Settings() {
  const settings = settingsData?.settings || null;
  const { data: inferenceComputeResponse } = useQuery({
    queryKey: ["inferenceCompute"],
    queryFn: getInferenceCompute,
  });
  const defaultContextLength = inferenceComputeResponse?.defaultContextLength;
  const updateSettingsMutation = useMutation({
    mutationFn: updateSettings,
    onSuccess: () => {
@@ -74,6 +95,50 @@ export default function Settings() {
    },
  });
  const updateCloudMutation = useMutation({
    mutationFn: (enabled: boolean) => updateCloudSetting(enabled),
    onMutate: async (enabled: boolean) => {
      await queryClient.cancelQueries({ queryKey: ["cloudStatus"] });
      const previous = queryClient.getQueryData<CloudStatusResponse | null>([
        "cloudStatus",
      ]);
      const envForcesDisabled =
        previous?.source === "env" || previous?.source === "both";
      queryClient.setQueryData<CloudStatusResponse | null>(
        ["cloudStatus"],
        previous
          ? {
              ...previous,
              disabled: !enabled || envForcesDisabled,
            }
          : {
              disabled: !enabled,
              source: "config",
            },
      );
      return { previous };
    },
    onError: (_error, _enabled, context) => {
      if (context?.previous !== undefined) {
        queryClient.setQueryData(["cloudStatus"], context.previous);
      }
    },
    onSuccess: (status) => {
      queryClient.setQueryData<CloudStatusResponse | null>(
        ["cloudStatus"],
        status,
      );
      queryClient.invalidateQueries({ queryKey: ["models"] });
      queryClient.invalidateQueries({ queryKey: ["cloudStatus"] });
      setShowSaved(true);
      setTimeout(() => setShowSaved(false), 1500);
    },
  });
  useEffect(() => {
    refetchUser();
  }, []); // eslint-disable-line react-hooks/exhaustive-deps
@@ -148,13 +213,18 @@ export default function Settings() {
        Models: "",
        Agent: false,
        Tools: false,
-        ContextLength: 4096,
+        ContextLength: 0,
-        AirplaneMode: false,
+        AutoUpdateEnabled: true,
      });
      updateSettingsMutation.mutate(defaultSettings);
    }
  };
  const cloudOverriddenByEnv =
    cloudStatus?.source === "env" || cloudStatus?.source === "both";
  const cloudToggleDisabled =
    cloudStatusLoading || updateCloudMutation.isPending || cloudOverriddenByEnv;
  const handleConnectOllamaAccount = async () => {
    setConnectionError(null);
@@ -203,6 +273,10 @@ export default function Settings() {
  }
  const isWindows = navigator.platform.toLowerCase().includes("win");
  const handleCloseSettings = () => {
    const chatId = settings.LastHomeView === "chat" ? "new" : "launch";
    navigate({ to: "/c/$chatId", params: { chatId } });
  };
  return (
    <main className="flex h-screen w-full flex-col select-none dark:bg-neutral-900">
@@ -216,7 +290,7 @@ export default function Settings() {
        >
          {isWindows && (
            <button
-              onClick={() => navigate({ to: "/" })}
+              onClick={handleCloseSettings}
              className="hover:bg-neutral-100 mr-3 dark:hover:bg-neutral-800 rounded-full p-1.5"
            >
              <ArrowLeftIcon className="w-5 h-5 dark:text-white" />
@@ -226,7 +300,7 @@ export default function Settings() {
        </h1>
        {!isWindows && (
          <button
-            onClick={() => navigate({ to: "/" })}
+            onClick={handleCloseSettings}
            className="p-1 hover:bg-neutral-100 mr-3 dark:hover:bg-neutral-800 rounded-full"
          >
            <XMarkIcon className="w-6 h-6 dark:text-white" />
@@ -237,7 +311,7 @@ export default function Settings() {
        <div className="space-y-4 max-w-2xl mx-auto">
          {/* Connect Ollama Account */}
          <div className="overflow-hidden rounded-xl bg-white dark:bg-neutral-800">
-            <div className="p-4 border-b border-neutral-200 dark:border-neutral-800">
+            <div className="p-4">
              <Field>
                {isLoading ? (
                  // Loading skeleton, this will only happen if the app started recently
@@ -344,6 +418,57 @@ export default function Settings() {
          {/* Local Configuration */}
          <div className="relative overflow-hidden rounded-xl bg-white dark:bg-neutral-800">
            <div className="space-y-4 p-4">
              <Field>
                <div className="flex items-start justify-between gap-4">
                  <div className="flex items-start space-x-3 flex-1">
                    <CloudIcon className="mt-1 h-5 w-5 flex-shrink-0 text-black dark:text-neutral-100" />
                    <div>
                      <Label>Cloud</Label>
                      <Description>
                        {cloudOverriddenByEnv
                          ? "The OLLAMA_NO_CLOUD environment variable is currently forcing cloud off."
                          : "Enable cloud models and web search."}
                      </Description>
                    </div>
                  </div>
                  <div className="flex-shrink-0">
                    <Switch
                      checked={!cloudDisabled}
                      disabled={cloudToggleDisabled}
                      onChange={(checked) => {
                        if (cloudOverriddenByEnv) {
                          return;
                        }
                        updateCloudMutation.mutate(checked);
                      }}
                    />
                  </div>
                </div>
              </Field>
              {/* Auto Update */}
              <Field>
                <div className="flex items-start justify-between gap-4">
                  <div className="flex items-start space-x-3 flex-1">
                    <ArrowDownTrayIcon className="mt-1 h-5 w-5 flex-shrink-0 text-black dark:text-neutral-100" />
                    <div>
                      <Label>Auto-download updates</Label>
                      <Description>
                        {settings.AutoUpdateEnabled
                          ? "Automatically download updates when available."
                          : "Updates will not be downloaded automatically."}
                      </Description>
                    </div>
                  </div>
                  <div className="flex-shrink-0">
                    <Switch
                      checked={settings.AutoUpdateEnabled}
                      onChange={(checked) => handleChange("AutoUpdateEnabled", checked)}
                    />
                  </div>
                </div>
              </Field>
              {/* Expose Ollama */}
              <Field>
                <div className="flex items-start justify-between gap-4">
@@ -419,13 +544,11 @@ export default function Settings() {
                    </Description>
                    <div className="mt-3">
                      <Slider
-                        value={(() => {
+                        value={settings.ContextLength || defaultContextLength || 0}
                          // Otherwise use the settings value
                          return settings.ContextLength || 4096;
                        })()}
                        onChange={(value) => {
                          handleChange("ContextLength", value);
                        }}
                        disabled={!defaultContextLength}
                        options={[
                          { value: 4096, label: "4k" },
                          { value: 8192, label: "8k" },
@@ -440,35 +563,6 @@ export default function Settings() {
                  </div>
                </div>
              </Field>
              {/* Airplane Mode */}
              <Field>
                <div className="flex items-start justify-between gap-4">
                  <div className="flex items-start space-x-3 flex-1">
                    <svg
                      className="mt-1 h-5 w-5 flex-shrink-0 text-black dark:text-neutral-100"
                      viewBox="0 0 21.5508 17.9033"
                      fill="currentColor"
                    >
                      <path d="M21.5508 8.94727C21.542 7.91895 20.1445 7.17188 18.4658 7.17188L14.9238 7.17188C14.4316 7.17188 14.2471 7.09277 13.957 6.75879L8.05078 0.316406C7.86621 0.105469 7.6377 0 7.37402 0L6.35449 0C6.12598 0 5.99414 0.202148 6.1084 0.448242L9.14941 7.17188L4.68457 7.68164L3.09375 4.76367C2.97949 4.54395 2.78613 4.44727 2.49609 4.44727L2.11816 4.44727C1.88965 4.44727 1.74023 4.59668 1.74023 4.8252L1.74023 13.0693C1.74023 13.2979 1.88965 13.4385 2.11816 13.4385L2.49609 13.4385C2.78613 13.4385 2.97949 13.3418 3.09375 13.1309L4.68457 10.2129L9.14941 10.7227L6.1084 17.4463C5.99414 17.6836 6.12598 17.8945 6.35449 17.8945L7.37402 17.8945C7.6377 17.8945 7.86621 17.7803 8.05078 17.5781L13.957 11.127C14.2471 10.8018 14.4316 10.7227 14.9238 10.7227L18.4658 10.7227C20.1445 10.7227 21.542 9.9668 21.5508 8.94727Z" />
                    </svg>
                    <div>
                      <Label>Airplane mode</Label>
                      <Description>
                        Airplane mode keeps data local, disabling cloud models
                        and web search.
                      </Description>
                    </div>
                  </div>
                  <div className="flex-shrink-0">
                    <Switch
                      checked={settings.AirplaneMode}
                      onChange={(checked) =>
                        handleChange("AirplaneMode", checked)
                      }
                    />
                  </div>
                </div>
              </Field>
            </div>
          </div>
--- a/app/ui/app/src/components/ui/badge.tsx
+++ b/app/ui/app/src/components/ui/badge.tsx
@@ -65,7 +65,7 @@ export const BadgeButton = forwardRef(function BadgeButton(
    ),
  ref: React.ForwardedRef<HTMLElement>,
 ) {
-  let classes = clsx(
+  const classes = clsx(
    className,
    "group relative inline-flex rounded-md focus:not-data-focus:outline-hidden data-focus:outline-2 data-focus:outline-offset-2 data-focus:outline-blue-500",
  );
--- a/app/ui/app/src/components/ui/button.tsx
+++ b/app/ui/app/src/components/ui/button.tsx
@@ -171,7 +171,7 @@ export const Button = forwardRef(function Button(
  { color, outline, plain, className, children, ...props }: ButtonProps,
  ref: React.ForwardedRef<HTMLElement>,
 ) {
-  let classes = clsx(
+  const classes = clsx(
    className,
    styles.base,
    outline
--- a/app/ui/app/src/components/ui/slider.tsx
+++ b/app/ui/app/src/components/ui/slider.tsx
@@ -6,10 +6,11 @@ export interface SliderProps {
  value?: number;
  onChange?: (value: number) => void;
  className?: string;
  disabled?: boolean;
 }
 const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
-  ({ label, options, value = 0, onChange }, ref) => {
+  ({ label, options, value = 0, onChange, disabled = false }, ref) => {
    const [selectedValue, setSelectedValue] = React.useState(value);
    const [isDragging, setIsDragging] = React.useState(false);
    const containerRef = React.useRef<HTMLDivElement>(null);
@@ -20,6 +21,7 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
    }, [value]);
    const handleClick = (optionValue: number) => {
      if (disabled) return;
      setSelectedValue(optionValue);
      onChange?.(optionValue);
    };
@@ -39,6 +41,7 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
    };
    const handleMouseDown = (e: React.MouseEvent) => {
      if (disabled) return;
      setIsDragging(true);
      e.preventDefault();
    };
@@ -77,7 +80,7 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
    }
    return (
-      <div className="space-y-2" ref={ref}>
+      <div className={`space-y-2 ${disabled ? "opacity-50" : ""}`} ref={ref}>
        {label && <label className="text-sm font-medium">{label}</label>}
        <div className="relative">
          <div className="absolute top-[9px] left-2 right-2 h-1 bg-neutral-200 dark:bg-neutral-700 pointer-events-none rounded-full" />
@@ -88,10 +91,11 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
                <button
                  onClick={() => handleClick(option.value)}
                  onMouseDown={handleMouseDown}
-                  className="relative px-3 py-6 -mx-3 -my-6 z-10 cursor-pointer"
+                  disabled={disabled}
                  className={`relative px-3 py-6 -mx-3 -my-6 z-10 ${disabled ? "cursor-not-allowed" : "cursor-pointer"}`}
                >
                  <div className="relative w-5 h-5 flex items-center justify-center">
-                    {selectedValue === option.value && (
+                    {selectedValue === option.value && !disabled && (
                      <div className="w-4 h-4 bg-white dark:bg-white border border-neutral-400 dark:border-neutral-500 rounded-full cursor-grab active:cursor-grabbing" />
                    )}
                  </div>
--- a/app/ui/app/src/hooks/useChats.ts
+++ b/app/ui/app/src/hooks/useChats.ts
@@ -6,8 +6,8 @@ import { useSelectedModel } from "./useSelectedModel";
 import { createQueryBatcher } from "./useQueryBatcher";
 import { useRefetchModels } from "./useModels";
 import { useStreamingContext } from "@/contexts/StreamingContext";
 import { useSettings } from "./useSettings";
 import { getModelCapabilities } from "@/api";
 import { useCloudStatus } from "./useCloudStatus";
 export const useChats = () => {
  return useQuery({
@@ -116,11 +116,9 @@ export const useIsModelStale = (modelName: string) => {
 export const useShouldShowStaleDisplay = (model: Model | null) => {
  const isStale = useIsModelStale(model?.model || "");
  const { data: dismissedModels } = useDismissedStaleModels();
-  const {
+  const { cloudDisabled } = useCloudStatus();
    settings: { airplaneMode },
  } = useSettings();
-  if (model?.isCloud() && !airplaneMode) {
+  if (model?.isCloud() && !cloudDisabled) {
    return false;
  }
--- a/app/ui/app/src/hooks/useCloudStatus.ts
+++ b/app/ui/app/src/hooks/useCloudStatus.ts
@@ -0,0 +1,20 @@
 import { useQuery } from "@tanstack/react-query";
 import { getCloudStatus, type CloudStatusResponse } from "@/api";
 export function useCloudStatus() {
  const cloudQuery = useQuery<CloudStatusResponse | null>({
    queryKey: ["cloudStatus"],
    queryFn: getCloudStatus,
    retry: false,
    staleTime: 60 * 1000,
  });
  return {
    cloudStatus: cloudQuery.data,
    cloudDisabled: cloudQuery.data?.disabled ?? false,
    isKnown: cloudQuery.data !== null && cloudQuery.data !== undefined,
    isLoading: cloudQuery.isLoading,
    isError: cloudQuery.isError,
    error: cloudQuery.error,
  };
 }
--- a/app/ui/app/src/hooks/useModelCapabilities.ts
+++ b/app/ui/app/src/hooks/useModelCapabilities.ts
@@ -20,3 +20,8 @@ export function useHasVisionCapability(modelName: string | undefined) {
  const { data: capabilitiesResponse } = useModelCapabilities(modelName);
  return capabilitiesResponse?.capabilities?.includes("vision") ?? false;
 }
 export function useHasToolsCapability(modelName: string | undefined) {
  const { data: capabilitiesResponse } = useModelCapabilities(modelName);
  return capabilitiesResponse?.capabilities?.includes("tools") ?? false;
 }
--- a/app/ui/app/src/hooks/useModels.ts
+++ b/app/ui/app/src/hooks/useModels.ts
@@ -2,11 +2,11 @@ import { useQuery } from "@tanstack/react-query";
 import { Model } from "@/gotypes";
 import { getModels } from "@/api";
 import { mergeModels } from "@/utils/mergeModels";
 import { useSettings } from "./useSettings";
 import { useMemo } from "react";
 import { useCloudStatus } from "./useCloudStatus";
 export function useModels(searchQuery = "") {
-  const { settings } = useSettings();
+  const { cloudDisabled } = useCloudStatus();
  const localQuery = useQuery<Model[], Error>({
    queryKey: ["models", searchQuery],
    queryFn: () => getModels(searchQuery),
@@ -20,7 +20,7 @@ export function useModels(searchQuery = "") {
  });
  const allModels = useMemo(() => {
-    const models = mergeModels(localQuery.data || [], settings.airplaneMode);
+    const models = mergeModels(localQuery.data || [], cloudDisabled);
    if (searchQuery && searchQuery.trim()) {
      const query = searchQuery.toLowerCase().trim();
@@ -40,7 +40,7 @@ export function useModels(searchQuery = "") {
    }
    return models;
-  }, [localQuery.data, searchQuery, settings.airplaneMode]);
+  }, [localQuery.data, searchQuery, cloudDisabled]);
  return {
    ...localQuery,
--- a/app/ui/app/src/hooks/useSelectedModel.ts
+++ b/app/ui/app/src/hooks/useSelectedModel.ts
@@ -7,6 +7,7 @@ import { Model } from "@/gotypes";
 import { FEATURED_MODELS } from "@/utils/mergeModels";
 import { getTotalVRAM } from "@/utils/vram.ts";
 import { getInferenceCompute } from "@/api";
 import { useCloudStatus } from "./useCloudStatus";
 export function recommendDefaultModel(totalVRAM: number): string {
  const vram = Math.max(0, Number(totalVRAM) || 0);
@@ -22,16 +23,19 @@ export function recommendDefaultModel(totalVRAM: number): string {
 export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
  const { settings, setSettings } = useSettings();
  const { data: models = [], isLoading } = useModels(searchQuery || "");
  const { cloudDisabled } = useCloudStatus();
  const { data: chatData, isLoading: isChatLoading } = useChat(
    currentChatId && currentChatId !== "new" ? currentChatId : "",
  );
-  const { data: inferenceComputes = [] } = useQuery({
+  const { data: inferenceComputeResponse } = useQuery({
-    queryKey: ["inference-compute"],
+    queryKey: ["inferenceCompute"],
    queryFn: getInferenceCompute,
    enabled: !settings.selectedModel, // Only fetch if no model is selected
  });
  const inferenceComputes = inferenceComputeResponse?.inferenceComputes || [];
  const totalVRAM = useMemo(
    () => getTotalVRAM(inferenceComputes),
    [inferenceComputes],
@@ -46,12 +50,11 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
  const restoredChatRef = useRef<string | null>(null);
  const selectedModel: Model | null = useMemo(() => {
-    // if airplane mode is on and selected model ends with cloud,
+    // If cloud is disabled and selected model ends with cloud, switch to a local default.
-    // switch to recommended default model
+    if (cloudDisabled && settings.selectedModel?.endsWith("cloud")) {
    if (settings.airplaneMode && settings.selectedModel?.endsWith("cloud")) {
      return (
        models.find((m) => m.model === recommendedModel) ||
-        models.find((m) => m.isCloud) ||
+        models.find((m) => !m.isCloud()) ||
        models.find((m) => m.digest === undefined || m.digest === "") ||
        models[0] ||
        null
@@ -68,7 +71,7 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
      "qwen3-coder:480b",
    ];
    const shouldMigrate =
-      !settings.airplaneMode &&
+      !cloudDisabled &&
      settings.turboEnabled &&
      baseModelsToMigrate.includes(settings.selectedModel);
@@ -96,13 +99,18 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
        })) ||
      null
    );
-  }, [models, settings.selectedModel, settings.airplaneMode, recommendedModel]);
+  }, [
    models,
    settings.selectedModel,
    cloudDisabled,
    recommendedModel,
  ]);
  useEffect(() => {
    if (!selectedModel) return;
    if (
-      settings.airplaneMode &&
+      cloudDisabled &&
      settings.selectedModel?.endsWith("cloud") &&
      selectedModel.model !== settings.selectedModel
    ) {
@@ -110,13 +118,17 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
    }
    if (
-      !settings.airplaneMode &&
+      !cloudDisabled &&
      settings.turboEnabled &&
      selectedModel.model !== settings.selectedModel
    ) {
      setSettings({ SelectedModel: selectedModel.model, TurboEnabled: false });
    }
-  }, [selectedModel, settings.airplaneMode, settings.selectedModel]);
+  }, [
    selectedModel,
    cloudDisabled,
    settings.selectedModel,
  ]);
  // Set model from chat history when chat data loads
  useEffect(() => {
@@ -169,7 +181,9 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
    const defaultModel =
      models.find((m) => m.model === recommendedModel) ||
-      models.find((m) => m.isCloud()) ||
+      (cloudDisabled
        ? models.find((m) => !m.isCloud())
        : models.find((m) => m.isCloud())) ||
      models.find((m) => m.digest === undefined || m.digest === "") ||
      models[0];
@@ -181,6 +195,7 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
    inferenceComputes.length,
    models.length,
    settings.selectedModel,
    cloudDisabled,
  ]);
  // Add the selected model to the models list if it's not already there
--- a/app/ui/app/src/hooks/useSettings.ts
+++ b/app/ui/app/src/hooks/useSettings.ts
@@ -9,7 +9,7 @@ interface SettingsState {
  webSearchEnabled: boolean;
  selectedModel: string;
  sidebarOpen: boolean;
-  airplaneMode: boolean;
+  lastHomeView: string;
  thinkEnabled: boolean;
  thinkLevel: string;
 }
@@ -22,6 +22,7 @@ type SettingsUpdate = Partial<{
  ThinkLevel: string;
  SelectedModel: string;
  SidebarOpen: boolean;
  LastHomeView: string;
 }>;
 export function useSettings() {
@@ -51,7 +52,7 @@ export function useSettings() {
      thinkLevel: settingsData?.settings?.ThinkLevel ?? "none",
      selectedModel: settingsData?.settings?.SelectedModel ?? "",
      sidebarOpen: settingsData?.settings?.SidebarOpen ?? false,
-      airplaneMode: settingsData?.settings?.AirplaneMode ?? false,
+      lastHomeView: settingsData?.settings?.LastHomeView ?? "launch",
    }),
    [settingsData?.settings],
  );
--- a/app/ui/app/src/routes/__root.tsx
+++ b/app/ui/app/src/routes/__root.tsx
@@ -2,6 +2,7 @@ import type { QueryClient } from "@tanstack/react-query";
 import { createRootRouteWithContext, Outlet } from "@tanstack/react-router";
 import { getSettings } from "@/api";
 import { useQuery } from "@tanstack/react-query";
 import { useCloudStatus } from "@/hooks/useCloudStatus";
 function RootComponent() {
  // This hook ensures settings are fetched on app startup
@@ -9,6 +10,8 @@ function RootComponent() {
    queryKey: ["settings"],
    queryFn: getSettings,
  });
  // Fetch cloud status on startup (best-effort)
  useCloudStatus();
  return (
    <div>
--- a/app/ui/app/src/routes/c.$chatId.tsx
+++ b/app/ui/app/src/routes/c.$chatId.tsx
@@ -4,12 +4,37 @@ import Chat from "@/components/Chat";
 import { getChat } from "@/api";
 import { SidebarLayout } from "@/components/layout/layout";
 import { ChatSidebar } from "@/components/ChatSidebar";
 import LaunchCommands from "@/components/LaunchCommands";
 import { useEffect, useRef } from "react";
 import { useSettings } from "@/hooks/useSettings";
 const launchSidebarRequestedKey = "ollama.launchSidebarRequested";
 const launchSidebarSeenKey = "ollama.launchSidebarSeen";
 const fallbackSessionState = new Map<string, string>();
 function getSessionState() {
  if (typeof sessionStorage !== "undefined") {
    return sessionStorage;
  }
  return {
    getItem(key: string) {
      return fallbackSessionState.get(key) ?? null;
    },
    setItem(key: string, value: string) {
      fallbackSessionState.set(key, value);
    },
    removeItem(key: string) {
      fallbackSessionState.delete(key);
    },
  };
 }
 export const Route = createFileRoute("/c/$chatId")({
  component: RouteComponent,
  loader: async ({ context, params }) => {
-    // Skip loading for "new" chat
+    // Skip loading for special non-chat views
-    if (params.chatId !== "new") {
+    if (params.chatId !== "new" && params.chatId !== "launch") {
      context.queryClient.ensureQueryData({
        queryKey: ["chat", params.chatId],
        queryFn: () => getChat(params.chatId),
@@ -21,13 +46,70 @@ export const Route = createFileRoute("/c/$chatId")({
 function RouteComponent() {
  const { chatId } = Route.useParams();
  const { settingsData, setSettings } = useSettings();
  const previousChatIdRef = useRef<string | null>(null);
-  // Always call hooks at the top level - use a flag to skip data when chatId is "new"
+  // Always call hooks at the top level - use a flag to skip data when chatId is a special view
  const {
    data: chatData,
    isLoading: chatLoading,
    error: chatError,
-  } = useChat(chatId === "new" ? "" : chatId);
+  } = useChat(chatId === "new" || chatId === "launch" ? "" : chatId);
  useEffect(() => {
    if (!settingsData) {
      return;
    }
    const previousChatId = previousChatIdRef.current;
    previousChatIdRef.current = chatId;
    if (chatId === "launch") {
      const sessionState = getSessionState();
      const shouldOpenSidebar =
        previousChatId !== "launch" &&
        (() => {
          if (sessionState.getItem(launchSidebarRequestedKey) === "1") {
            sessionState.removeItem(launchSidebarRequestedKey);
            sessionState.setItem(launchSidebarSeenKey, "1");
            return true;
          }
          if (sessionState.getItem(launchSidebarSeenKey) !== "1") {
            sessionState.setItem(launchSidebarSeenKey, "1");
            return true;
          }
          return false;
        })();
      const updates: { LastHomeView?: string; SidebarOpen?: boolean } = {};
      if (settingsData.LastHomeView !== "launch") {
        updates.LastHomeView = "launch";
      }
      if (shouldOpenSidebar && !settingsData.SidebarOpen) {
        updates.SidebarOpen = true;
      }
      if (Object.keys(updates).length === 0) {
        return;
      }
      setSettings(updates).catch(() => {
        // Best effort persistence for home view preference.
      });
      return;
    }
    if (settingsData.LastHomeView === "chat") {
      return;
    }
    setSettings({ LastHomeView: "chat" }).catch(() => {
      // Best effort persistence for home view preference.
    });
  }, [chatId, settingsData, setSettings]);
  // Handle "new" chat case - just use Chat component which handles everything
  if (chatId === "new") {
@@ -38,6 +120,14 @@ function RouteComponent() {
    );
  }
  if (chatId === "launch") {
    return (
      <SidebarLayout sidebar={<ChatSidebar currentChatId={chatId} />}>
        <LaunchCommands />
      </SidebarLayout>
    );
  }
  // Handle existing chat case
  if (chatLoading) {
    return (
--- a/app/ui/app/src/routes/index.tsx
+++ b/app/ui/app/src/routes/index.tsx
@@ -1,10 +1,18 @@
 import { createFileRoute, redirect } from "@tanstack/react-router";
 import { getSettings } from "@/api";
 export const Route = createFileRoute("/")({
-  beforeLoad: () => {
+  beforeLoad: async ({ context }) => {
    const settingsData = await context.queryClient.ensureQueryData({
      queryKey: ["settings"],
      queryFn: getSettings,
    });
    const chatId =
      settingsData?.settings?.LastHomeView === "chat" ? "new" : "launch";
    throw redirect({
      to: "/c/$chatId",
-      params: { chatId: "new" },
+      params: { chatId },
      mask: {
        to: "/",
      },
--- a/app/ui/app/src/utils/clipboard.test.ts
+++ b/app/ui/app/src/utils/clipboard.test.ts
@@ -0,0 +1,57 @@
 import { describe, expect, it, vi, beforeEach } from "vitest";
 import { copyTextToClipboard } from "./clipboard";
 describe("copyTextToClipboard", () => {
  beforeEach(() => {
    vi.restoreAllMocks();
  });
  it("copies via Clipboard API when available", async () => {
    const writeText = vi.fn().mockResolvedValue(undefined);
    vi.stubGlobal("navigator", {
      clipboard: {
        writeText,
      },
    });
    const copied = await copyTextToClipboard("ollama launch claude");
    expect(copied).toBe(true);
    expect(writeText).toHaveBeenCalledWith("ollama launch claude");
  });
  it("falls back to execCommand when Clipboard API fails", async () => {
    const writeText = vi.fn().mockRejectedValue(new Error("not allowed"));
    vi.stubGlobal("navigator", {
      clipboard: {
        writeText,
      },
    });
    const textarea = {
      value: "",
      setAttribute: vi.fn(),
      style: {} as Record<string, string>,
      focus: vi.fn(),
      select: vi.fn(),
    };
    const appendChild = vi.fn();
    const removeChild = vi.fn();
    const execCommand = vi.fn().mockReturnValue(true);
    vi.stubGlobal("document", {
      createElement: vi.fn().mockReturnValue(textarea),
      body: {
        appendChild,
        removeChild,
      },
      execCommand,
    });
    const copied = await copyTextToClipboard("ollama launch openclaw");
    expect(copied).toBe(true);
    expect(execCommand).toHaveBeenCalledWith("copy");
    expect(appendChild).toHaveBeenCalled();
    expect(removeChild).toHaveBeenCalled();
  });
 });
--- a/app/ui/app/src/utils/clipboard.ts
+++ b/app/ui/app/src/utils/clipboard.ts
@@ -0,0 +1,30 @@
 export async function copyTextToClipboard(text: string): Promise<boolean> {
  try {
    await navigator.clipboard.writeText(text);
    return true;
  } catch (clipboardError) {
    console.error(
      "Clipboard API failed, falling back to execCommand",
      clipboardError,
    );
  }
  try {
    const textarea = document.createElement("textarea");
    textarea.value = text;
    textarea.setAttribute("readonly", "true");
    textarea.style.position = "fixed";
    textarea.style.left = "-9999px";
    textarea.style.opacity = "0";
    document.body.appendChild(textarea);
    textarea.focus();
    textarea.select();
    const copied = document.execCommand("copy");
    document.body.removeChild(textarea);
    return copied;
  } catch (fallbackError) {
    console.error("Fallback copy failed", fallbackError);
    return false;
  }
 }
--- a/app/ui/app/src/utils/fileValidation.test.ts
+++ b/app/ui/app/src/utils/fileValidation.test.ts
@@ -29,13 +29,15 @@ describe("fileValidation", () => {
      expect(result.valid).toBe(true);
    });
-    it("should reject WebP images when vision capability is disabled", () => {
+    it("should accept images regardless of vision capability", () => {
      // Vision capability check is handled at the UI layer (ChatForm),
      // not at validation time, so users can switch models without
      // needing to re-upload files.
      const file = createMockFile("test.webp", 1024, "image/webp");
      const result = validateFile(file, {
        hasVisionCapability: false,
      });
-      expect(result.valid).toBe(false);
+      expect(result.valid).toBe(true);
      expect(result.error).toBe("This model does not support images");
    });
    it("should accept PNG images when vision capability is enabled", () => {
--- a/app/ui/app/src/utils/fileValidation.ts
+++ b/app/ui/app/src/utils/fileValidation.ts
@@ -63,7 +63,6 @@ export function validateFile(
  const {
    maxFileSize = 10,
    allowedExtensions = [...TEXT_FILE_EXTENSIONS, ...IMAGE_EXTENSIONS],
    hasVisionCapability = false,
    customValidator,
  } = options;
@@ -83,10 +82,6 @@ export function validateFile(
    return { valid: false, error: "File type not supported" };
  }
  if (IMAGE_EXTENSIONS.includes(fileExtension) && !hasVisionCapability) {
    return { valid: false, error: "This model does not support images" };
  }
  // File size validation
  if (file.size > MAX_FILE_SIZE) {
    return { valid: false, error: "File too large" };
--- a/app/ui/app/src/utils/mergeModels.test.ts
+++ b/app/ui/app/src/utils/mergeModels.test.ts
@@ -41,14 +41,14 @@ describe("Model merging logic", () => {
    expect(merged.length).toBe(FEATURED_MODELS.length + 2);
  });
-  it("should hide cloud models in airplane mode", () => {
+  it("should hide cloud models when cloud is disabled", () => {
    const localModels: Model[] = [
      new Model({ model: "gpt-oss:120b-cloud" }),
      new Model({ model: "llama3:latest" }),
      new Model({ model: "mistral:latest" }),
    ];
-    const merged = mergeModels(localModels, true); // airplane mode = true
+    const merged = mergeModels(localModels, true); // cloud disabled = true
    // No cloud models should be present
    const cloudModels = merged.filter((m) => m.isCloud());
--- a/app/ui/app/src/utils/mergeModels.ts
+++ b/app/ui/app/src/utils/mergeModels.ts
@@ -2,27 +2,28 @@ import { Model } from "@/gotypes";
 // Featured models list (in priority order)
 export const FEATURED_MODELS = [
  "kimi-k2.5:cloud",
  "glm-5:cloud",
  "minimax-m2.7:cloud",
  "gemma4:31b-cloud",
  "qwen3.5:397b-cloud",
  "gpt-oss:120b-cloud",
  "gpt-oss:20b-cloud",
  "deepseek-v3.1:671b-cloud",
  "qwen3-coder:480b-cloud",
  "qwen3-vl:235b-cloud",
  "minimax-m2:cloud",
  "glm-4.6:cloud",
  "gpt-oss:120b",
  "gpt-oss:20b",
-  "gemma3:27b",
+  "gemma4:31b",
-  "gemma3:12b",
+  "gemma4:26b",
-  "gemma3:4b",
+  "gemma4:e4b",
-  "gemma3:1b",
+  "gemma4:e2b",
  "deepseek-r1:8b",
  "qwen3-coder:30b",
  "qwen3-vl:30b",
  "qwen3-vl:8b",
  "qwen3-vl:4b",
-  "qwen3:30b",
+  "qwen3.5:27b",
-  "qwen3:8b",
+  "qwen3.5:9b",
-  "qwen3:4b",
+  "qwen3.5:4b",
 ];
 function alphabeticalSort(a: Model, b: Model): number {
@@ -32,7 +33,7 @@ function alphabeticalSort(a: Model, b: Model): number {
 //Merges models, sorting cloud models first, then other models
 export function mergeModels(
  localModels: Model[],
-  airplaneMode: boolean = false,
+  hideCloudModels: boolean = false,
 ): Model[] {
  const allModels = (localModels || []).map((model) => model);
@@ -95,7 +96,7 @@ export function mergeModels(
  remainingModels.sort(alphabeticalSort);
-  return airplaneMode
+  return hideCloudModels
    ? [...featuredModels, ...remainingModels]
    : [...cloudModels, ...featuredModels, ...remainingModels];
 }
--- a/app/ui/responses/types.go
+++ b/app/ui/responses/types.go
@@ -45,7 +45,8 @@ type InferenceCompute struct {
 }
 type InferenceComputeResponse struct {
-	InferenceComputes []InferenceCompute `json:"inferenceComputes"`
+	InferenceComputes    []InferenceCompute `json:"inferenceComputes"`
 	DefaultContextLength int                `json:"defaultContextLength"`
 }
 type ModelCapabilitiesResponse struct {
@@ -132,9 +133,8 @@ type Error struct {
 }
 type ModelUpstreamResponse struct {
-	Digest   string `json:"digest,omitempty"`
+	Stale bool   `json:"stale"`
-	PushTime int64  `json:"pushTime"`
+	Error string `json:"error,omitempty"`
 	Error    string `json:"error,omitempty"`
 }
 // Serializable data for the browser state
--- a/app/ui/ui.go
+++ b/app/ui/ui.go
@@ -28,9 +28,11 @@ import (
 	"github.com/ollama/ollama/app/tools"
 	"github.com/ollama/ollama/app/types/not"
 	"github.com/ollama/ollama/app/ui/responses"
 	"github.com/ollama/ollama/app/updater"
 	"github.com/ollama/ollama/app/version"
 	ollamaAuth "github.com/ollama/ollama/auth"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/types/model"
 	_ "github.com/tkrajina/typescriptify-golang-structs/typescriptify"
 )
@@ -106,6 +108,10 @@ type Server struct {
 	// Dev is true if the server is running in development mode
 	Dev bool
 	// Updater for checking and downloading updates
 	Updater             *updater.Updater
 	UpdateAvailableFunc func()
 }
 func (s *Server) log() *slog.Logger {
@@ -150,7 +156,7 @@ func (s *Server) ollamaProxy() http.Handler {
 					return
 				}
-				target := envconfig.Host()
+				target := envconfig.ConnectableHost()
 				s.log().Info("configuring ollama proxy", "target", target.String())
 				newProxy := httputil.NewSingleHostReverseProxy(target)
@@ -188,7 +194,7 @@ func (s *Server) Handler() http.Handler {
 			if CORS() {
 				w.Header().Set("Access-Control-Allow-Origin", "*")
 				w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
-				w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With")
+				w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, User-Agent, Accept, X-Requested-With")
 				w.Header().Set("Access-Control-Allow-Credentials", "true")
 				// Handle preflight requests
@@ -284,12 +290,15 @@ func (s *Server) Handler() http.Handler {
 	mux.Handle("POST /api/v1/model/upstream", handle(s.modelUpstream))
 	mux.Handle("GET /api/v1/settings", handle(s.getSettings))
 	mux.Handle("POST /api/v1/settings", handle(s.settings))
 	mux.Handle("GET /api/v1/cloud", handle(s.getCloudSetting))
 	mux.Handle("POST /api/v1/cloud", handle(s.cloudSetting))
 	// Ollama proxy endpoints
 	ollamaProxy := s.ollamaProxy()
 	mux.Handle("GET /api/tags", ollamaProxy)
 	mux.Handle("POST /api/show", ollamaProxy)
 	mux.Handle("GET /api/version", ollamaProxy)
 	mux.Handle("GET /api/status", ollamaProxy)
 	mux.Handle("HEAD /api/version", ollamaProxy)
 	mux.Handle("POST /api/me", ollamaProxy)
 	mux.Handle("POST /api/signout", ollamaProxy)
@@ -310,7 +319,7 @@ func (s *Server) handleError(w http.ResponseWriter, e error) {
 	if CORS() {
 		w.Header().Set("Access-Control-Allow-Origin", "*")
 		w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
-		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With")
+		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, User-Agent, Accept, X-Requested-With")
 		w.Header().Set("Access-Control-Allow-Credentials", "true")
 	}
@@ -333,8 +342,18 @@ func (t *userAgentTransport) RoundTrip(req *http.Request) (*http.Response, error
 // httpClient returns an HTTP client that automatically adds the User-Agent header
 func (s *Server) httpClient() *http.Client {
 	return userAgentHTTPClient(10 * time.Second)
 }
 // inferenceClient uses almost the same HTTP client, but without a timeout so
 // long requests aren't truncated
 func (s *Server) inferenceClient() *api.Client {
 	return api.NewClient(envconfig.Host(), userAgentHTTPClient(0))
 }
 func userAgentHTTPClient(timeout time.Duration) *http.Client {
 	return &http.Client{
-		Timeout: 10 * time.Second,
+		Timeout: timeout,
 		Transport: &userAgentTransport{
 			base: http.DefaultTransport,
 		},
@@ -712,11 +731,7 @@ func (s *Server) chat(w http.ResponseWriter, r *http.Request) error {
 	_, cancelLoading := context.WithCancel(ctx)
 	loading := false
-	c, err := api.ClientFromEnvironment()
+	c := s.inferenceClient()
 	if err != nil {
 		cancelLoading()
 		return err
 	}
 	// Check if the model exists locally by trying to show it
 	// TODO (jmorganca): skip this round trip and instead just act
@@ -826,8 +841,9 @@ func (s *Server) chat(w http.ResponseWriter, r *http.Request) error {
 	if !hasAttachments {
 		WebSearchEnabled := req.WebSearch != nil && *req.WebSearch
 		hasToolsCapability := slices.Contains(details.Capabilities, model.CapabilityTools)
-		if WebSearchEnabled {
+		if WebSearchEnabled && hasToolsCapability {
 			if supportsBrowserTools(req.Model) {
 				browserState, ok := s.browserState(chat)
 				if !ok {
@@ -837,7 +853,7 @@ func (s *Server) chat(w http.ResponseWriter, r *http.Request) error {
 				registry.Register(tools.NewBrowserSearch(browser))
 				registry.Register(tools.NewBrowserOpen(browser))
 				registry.Register(tools.NewBrowserFind(browser))
-			} else if supportsWebSearchTools(req.Model) {
+			} else {
 				registry.Register(&tools.WebSearch{})
 				registry.Register(&tools.WebFetch{})
 			}
@@ -1417,11 +1433,6 @@ func (s *Server) getSettings(w http.ResponseWriter, r *http.Request) error {
 		settings.Models = envconfig.Models()
 	}
 	// set default context length if not set
 	if settings.ContextLength == 0 {
 		settings.ContextLength = 4096
 	}
 	// Include current runtime settings
 	settings.Agent = s.Agent
 	settings.Tools = s.Tools
@@ -1448,6 +1459,24 @@ func (s *Server) settings(w http.ResponseWriter, r *http.Request) error {
 		return fmt.Errorf("failed to save settings: %w", err)
 	}
 	// Handle auto-update toggle changes
 	if old.AutoUpdateEnabled != settings.AutoUpdateEnabled {
 		if !settings.AutoUpdateEnabled {
 			// Auto-update disabled: cancel any ongoing download
 			if s.Updater != nil {
 				s.Updater.CancelOngoingDownload()
 			}
 		} else {
 			// Auto-update re-enabled: show notification if update is already staged, or trigger immediate check
 			if (updater.IsUpdatePending() || updater.UpdateDownloaded) && s.UpdateAvailableFunc != nil {
 				s.UpdateAvailableFunc()
 			} else if s.Updater != nil {
 				// Trigger the background checker to run immediately
 				s.Updater.TriggerImmediateCheck()
 			}
 		}
 	}
 	if old.ContextLength != settings.ContextLength ||
 		old.Models != settings.Models ||
 		old.Expose != settings.Expose {
@@ -1460,17 +1489,51 @@ func (s *Server) settings(w http.ResponseWriter, r *http.Request) error {
 	})
 }
 func (s *Server) cloudSetting(w http.ResponseWriter, r *http.Request) error {
 	var req struct {
 		Enabled bool `json:"enabled"`
 	}
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 		return fmt.Errorf("invalid request body: %w", err)
 	}
 	if err := s.Store.SetCloudEnabled(req.Enabled); err != nil {
 		return fmt.Errorf("failed to persist cloud setting: %w", err)
 	}
 	s.Restart()
 	return s.writeCloudStatus(w)
 }
 func (s *Server) getCloudSetting(w http.ResponseWriter, r *http.Request) error {
 	return s.writeCloudStatus(w)
 }
 func (s *Server) writeCloudStatus(w http.ResponseWriter) error {
 	disabled, source, err := s.Store.CloudStatus()
 	if err != nil {
 		return fmt.Errorf("failed to load cloud status: %w", err)
 	}
 	w.Header().Set("Content-Type", "application/json")
 	return json.NewEncoder(w).Encode(map[string]any{
 		"disabled": disabled,
 		"source":   source,
 	})
 }
 func (s *Server) getInferenceCompute(w http.ResponseWriter, r *http.Request) error {
 	ctx, cancel := context.WithTimeout(r.Context(), 500*time.Millisecond)
 	defer cancel()
-	serverInferenceComputes, err := server.GetInferenceComputer(ctx)
+	info, err := server.GetInferenceInfo(ctx)
 	if err != nil {
-		s.log().Error("failed to get inference compute", "error", err)
+		s.log().Error("failed to get inference info", "error", err)
-		return fmt.Errorf("failed to get inference compute: %w", err)
+		return fmt.Errorf("failed to get inference info: %w", err)
 	}
-	inferenceComputes := make([]responses.InferenceCompute, len(serverInferenceComputes))
+	inferenceComputes := make([]responses.InferenceCompute, len(info.Computes))
-	for i, ic := range serverInferenceComputes {
+	for i, ic := range info.Computes {
 		inferenceComputes[i] = responses.InferenceCompute{
 			Library: ic.Library,
 			Variant: ic.Variant,
@@ -1482,7 +1545,8 @@ func (s *Server) getInferenceCompute(w http.ResponseWriter, r *http.Request) err
 	}
 	response := responses.InferenceComputeResponse{
-		InferenceComputes: inferenceComputes,
+		InferenceComputes:    inferenceComputes,
 		DefaultContextLength: info.DefaultContextLength,
 	}
 	w.Header().Set("Content-Type", "application/json")
@@ -1515,9 +1579,18 @@ func (s *Server) modelUpstream(w http.ResponseWriter, r *http.Request) error {
 		return json.NewEncoder(w).Encode(response)
 	}
 	n := model.ParseName(req.Model)
 	stale := true
 	if m, err := manifest.ParseNamedManifest(n); err == nil {
 		if m.Digest() == digest {
 			stale = false
 		} else if pushTime > 0 && m.FileInfo().ModTime().Unix() >= pushTime {
 			stale = false
 		}
 	}
 	response := responses.ModelUpstreamResponse{
-		Digest:   digest,
+		Stale: stale,
 		PushTime: pushTime,
 	}
 	w.Header().Set("Content-Type", "application/json")
@@ -1615,18 +1688,6 @@ func supportsBrowserTools(model string) bool {
 	return strings.HasPrefix(strings.ToLower(model), "gpt-oss")
 }
 // Web search tools are simpler, providing only basic web search and fetch capabilities (e.g., "web_search", "web_fetch") without simulating a browser. Currently only qwen3 and deepseek-v3 support web search tools.
 func supportsWebSearchTools(model string) bool {
 	model = strings.ToLower(model)
 	prefixes := []string{"qwen3", "deepseek-v3"}
 	for _, p := range prefixes {
 		if strings.HasPrefix(model, p) {
 			return true
 		}
 	}
 	return false
 }
 // buildChatRequest converts store.Chat to api.ChatRequest
 func (s *Server) buildChatRequest(chat *store.Chat, model string, think any, availableTools []map[string]any) (*api.ChatRequest, error) {
 	var msgs []api.Message
--- a/app/ui/ui_test.go
+++ b/app/ui/ui_test.go
@@ -4,6 +4,7 @@ package ui
 import (
 	"bytes"
 	"context"
 	"encoding/json"
 	"io"
 	"net/http"
@@ -11,9 +12,12 @@ import (
 	"path/filepath"
 	"runtime"
 	"strings"
 	"sync/atomic"
 	"testing"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/app/store"
 	"github.com/ollama/ollama/app/updater"
 )
 func TestHandlePostApiSettings(t *testing.T) {
@@ -115,6 +119,107 @@ func TestHandlePostApiSettings(t *testing.T) {
 	}
 }
 func TestHandlePostApiCloudSetting(t *testing.T) {
 	tmpHome := t.TempDir()
 	t.Setenv("HOME", tmpHome)
 	t.Setenv("OLLAMA_NO_CLOUD", "")
 	testStore := &store.Store{
 		DBPath: filepath.Join(t.TempDir(), "db.sqlite"),
 	}
 	defer testStore.Close()
 	restartCount := 0
 	server := &Server{
 		Store: testStore,
 		Restart: func() {
 			restartCount++
 		},
 	}
 	for _, tc := range []struct {
 		name        string
 		body        string
 		wantEnabled bool
 	}{
 		{name: "disable cloud", body: `{"enabled": false}`, wantEnabled: false},
 		{name: "enable cloud", body: `{"enabled": true}`, wantEnabled: true},
 	} {
 		t.Run(tc.name, func(t *testing.T) {
 			req := httptest.NewRequest("POST", "/api/v1/cloud", bytes.NewBufferString(tc.body))
 			req.Header.Set("Content-Type", "application/json")
 			rr := httptest.NewRecorder()
 			if err := server.cloudSetting(rr, req); err != nil {
 				t.Fatalf("cloudSetting() error = %v", err)
 			}
 			if rr.Code != http.StatusOK {
 				t.Fatalf("cloudSetting() status = %d, want %d", rr.Code, http.StatusOK)
 			}
 			var got map[string]any
 			if err := json.Unmarshal(rr.Body.Bytes(), &got); err != nil {
 				t.Fatalf("cloudSetting() invalid response JSON: %v", err)
 			}
 			if got["disabled"] != !tc.wantEnabled {
 				t.Fatalf("response disabled = %v, want %v", got["disabled"], !tc.wantEnabled)
 			}
 			disabled, err := testStore.CloudDisabled()
 			if err != nil {
 				t.Fatalf("CloudDisabled() error = %v", err)
 			}
 			if gotEnabled := !disabled; gotEnabled != tc.wantEnabled {
 				t.Fatalf("cloud enabled = %v, want %v", gotEnabled, tc.wantEnabled)
 			}
 		})
 	}
 	if restartCount != 2 {
 		t.Fatalf("Restart called %d times, want 2", restartCount)
 	}
 }
 func TestHandleGetApiCloudSetting(t *testing.T) {
 	tmpHome := t.TempDir()
 	t.Setenv("HOME", tmpHome)
 	t.Setenv("OLLAMA_NO_CLOUD", "")
 	testStore := &store.Store{
 		DBPath: filepath.Join(t.TempDir(), "db.sqlite"),
 	}
 	defer testStore.Close()
 	if err := testStore.SetCloudEnabled(false); err != nil {
 		t.Fatalf("SetCloudEnabled(false) error = %v", err)
 	}
 	server := &Server{
 		Store:   testStore,
 		Restart: func() {},
 	}
 	req := httptest.NewRequest("GET", "/api/v1/cloud", nil)
 	rr := httptest.NewRecorder()
 	if err := server.getCloudSetting(rr, req); err != nil {
 		t.Fatalf("getCloudSetting() error = %v", err)
 	}
 	if rr.Code != http.StatusOK {
 		t.Fatalf("getCloudSetting() status = %d, want %d", rr.Code, http.StatusOK)
 	}
 	var got map[string]any
 	if err := json.Unmarshal(rr.Body.Bytes(), &got); err != nil {
 		t.Fatalf("getCloudSetting() invalid response JSON: %v", err)
 	}
 	if got["disabled"] != true {
 		t.Fatalf("response disabled = %v, want true", got["disabled"])
 	}
 	if got["source"] != "config" {
 		t.Fatalf("response source = %v, want config", got["source"])
 	}
 }
 func TestAuthenticationMiddleware(t *testing.T) {
 	tests := []struct {
 		name         string
@@ -421,3 +526,317 @@ func TestUserAgentTransport(t *testing.T) {
 	t.Logf("User-Agent transport successfully set: %s", receivedUA)
 }
 func TestInferenceClientUsesUserAgent(t *testing.T) {
 	var gotUserAgent atomic.Value
 	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		gotUserAgent.Store(r.Header.Get("User-Agent"))
 		w.Header().Set("Content-Type", "application/json")
 		w.Write([]byte(`{}`))
 	}))
 	defer ts.Close()
 	t.Setenv("OLLAMA_HOST", ts.URL)
 	server := &Server{}
 	client := server.inferenceClient()
 	_, err := client.Show(context.Background(), &api.ShowRequest{Model: "test"})
 	if err != nil {
 		t.Fatalf("show request failed: %v", err)
 	}
 	receivedUA, _ := gotUserAgent.Load().(string)
 	expectedUA := userAgent()
 	if receivedUA != expectedUA {
 		t.Errorf("User-Agent mismatch\nExpected: %s\nReceived: %s", expectedUA, receivedUA)
 	}
 }
 func TestSupportsBrowserTools(t *testing.T) {
 	tests := []struct {
 		model string
 		want  bool
 	}{
 		{"gpt-oss", true},
 		{"gpt-oss-latest", true},
 		{"GPT-OSS", true},
 		{"Gpt-Oss-v2", true},
 		{"qwen3", false},
 		{"deepseek-v3", false},
 		{"llama3.3", false},
 		{"", false},
 	}
 	for _, tt := range tests {
 		t.Run(tt.model, func(t *testing.T) {
 			if got := supportsBrowserTools(tt.model); got != tt.want {
 				t.Errorf("supportsBrowserTools(%q) = %v, want %v", tt.model, got, tt.want)
 			}
 		})
 	}
 }
 func TestWebSearchToolRegistration(t *testing.T) {
 	// Validates that the capability-gating logic in chat() correctly
 	// decides which tools to register based on model capabilities and
 	// the web search flag.
 	tests := []struct {
 		name             string
 		webSearchEnabled bool
 		hasToolsCap      bool
 		model            string
 		wantBrowser      bool // expects browser tools (gpt-oss)
 		wantWebSearch    bool // expects basic web search/fetch tools
 		wantNone         bool // expects no tools registered
 	}{
 		{
 			name:             "web search enabled with tools capability - browser model",
 			webSearchEnabled: true,
 			hasToolsCap:      true,
 			model:            "gpt-oss-latest",
 			wantBrowser:      true,
 		},
 		{
 			name:             "web search enabled with tools capability - non-browser model",
 			webSearchEnabled: true,
 			hasToolsCap:      true,
 			model:            "qwen3",
 			wantWebSearch:    true,
 		},
 		{
 			name:             "web search enabled without tools capability",
 			webSearchEnabled: true,
 			hasToolsCap:      false,
 			model:            "llama3.3",
 			wantNone:         true,
 		},
 		{
 			name:             "web search disabled with tools capability",
 			webSearchEnabled: false,
 			hasToolsCap:      true,
 			model:            "qwen3",
 			wantNone:         true,
 		},
 		{
 			name:             "web search disabled without tools capability",
 			webSearchEnabled: false,
 			hasToolsCap:      false,
 			model:            "llama3.3",
 			wantNone:         true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			// Replicate the decision logic from chat() handler
 			gotBrowser := false
 			gotWebSearch := false
 			if tt.webSearchEnabled && tt.hasToolsCap {
 				if supportsBrowserTools(tt.model) {
 					gotBrowser = true
 				} else {
 					gotWebSearch = true
 				}
 			}
 			if tt.wantBrowser && !gotBrowser {
 				t.Error("expected browser tools to be registered")
 			}
 			if tt.wantWebSearch && !gotWebSearch {
 				t.Error("expected web search tools to be registered")
 			}
 			if tt.wantNone && (gotBrowser || gotWebSearch) {
 				t.Error("expected no tools to be registered")
 			}
 			if !tt.wantBrowser && gotBrowser {
 				t.Error("unexpected browser tools registered")
 			}
 			if !tt.wantWebSearch && gotWebSearch {
 				t.Error("unexpected web search tools registered")
 			}
 		})
 	}
 }
 func TestSettingsToggleAutoUpdateOff_CancelsDownload(t *testing.T) {
 	testStore := &store.Store{
 		DBPath: filepath.Join(t.TempDir(), "db.sqlite"),
 	}
 	defer testStore.Close()
 	// Start with auto-update enabled
 	settings, err := testStore.Settings()
 	if err != nil {
 		t.Fatal(err)
 	}
 	settings.AutoUpdateEnabled = true
 	if err := testStore.SetSettings(settings); err != nil {
 		t.Fatal(err)
 	}
 	upd := &updater.Updater{Store: &store.Store{
 		DBPath: filepath.Join(t.TempDir(), "db2.sqlite"),
 	}}
 	defer upd.Store.Close()
 	// We can't easily mock CancelOngoingDownload, but we can verify
 	// the full settings handler flow works without error
 	server := &Server{
 		Store:   testStore,
 		Restart: func() {},
 		Updater: upd,
 	}
 	// Disable auto-update via settings API
 	settings.AutoUpdateEnabled = false
 	body, err := json.Marshal(settings)
 	if err != nil {
 		t.Fatal(err)
 	}
 	req := httptest.NewRequest("POST", "/api/v1/settings", bytes.NewReader(body))
 	req.Header.Set("Content-Type", "application/json")
 	rr := httptest.NewRecorder()
 	if err := server.settings(rr, req); err != nil {
 		t.Fatalf("settings() error = %v", err)
 	}
 	if rr.Code != http.StatusOK {
 		t.Fatalf("settings() status = %d, want %d", rr.Code, http.StatusOK)
 	}
 	// Verify settings were saved with auto-update disabled
 	saved, err := testStore.Settings()
 	if err != nil {
 		t.Fatal(err)
 	}
 	if saved.AutoUpdateEnabled {
 		t.Fatal("expected AutoUpdateEnabled to be false after toggle off")
 	}
 }
 func TestSettingsToggleAutoUpdateOn_WithPendingUpdate_ShowsNotification(t *testing.T) {
 	testStore := &store.Store{
 		DBPath: filepath.Join(t.TempDir(), "db.sqlite"),
 	}
 	defer testStore.Close()
 	// Start with auto-update disabled
 	settings, err := testStore.Settings()
 	if err != nil {
 		t.Fatal(err)
 	}
 	settings.AutoUpdateEnabled = false
 	if err := testStore.SetSettings(settings); err != nil {
 		t.Fatal(err)
 	}
 	// Simulate that an update was previously downloaded
 	oldVal := updater.UpdateDownloaded
 	updater.UpdateDownloaded = true
 	defer func() { updater.UpdateDownloaded = oldVal }()
 	var notificationCalled atomic.Bool
 	server := &Server{
 		Store:   testStore,
 		Restart: func() {},
 		UpdateAvailableFunc: func() {
 			notificationCalled.Store(true)
 		},
 	}
 	// Re-enable auto-update via settings API
 	settings.AutoUpdateEnabled = true
 	body, err := json.Marshal(settings)
 	if err != nil {
 		t.Fatal(err)
 	}
 	req := httptest.NewRequest("POST", "/api/v1/settings", bytes.NewReader(body))
 	req.Header.Set("Content-Type", "application/json")
 	rr := httptest.NewRecorder()
 	if err := server.settings(rr, req); err != nil {
 		t.Fatalf("settings() error = %v", err)
 	}
 	if rr.Code != http.StatusOK {
 		t.Fatalf("settings() status = %d, want %d", rr.Code, http.StatusOK)
 	}
 	if !notificationCalled.Load() {
 		t.Fatal("expected UpdateAvailableFunc to be called when re-enabling with a downloaded update")
 	}
 }
 func TestSettingsToggleAutoUpdateOn_NoPendingUpdate_TriggersCheck(t *testing.T) {
 	testStore := &store.Store{
 		DBPath: filepath.Join(t.TempDir(), "db.sqlite"),
 	}
 	defer testStore.Close()
 	// Start with auto-update disabled
 	settings, err := testStore.Settings()
 	if err != nil {
 		t.Fatal(err)
 	}
 	settings.AutoUpdateEnabled = false
 	if err := testStore.SetSettings(settings); err != nil {
 		t.Fatal(err)
 	}
 	// Ensure no pending update - clear both the downloaded flag and the stage dir
 	oldVal := updater.UpdateDownloaded
 	updater.UpdateDownloaded = false
 	defer func() { updater.UpdateDownloaded = oldVal }()
 	oldStageDir := updater.UpdateStageDir
 	updater.UpdateStageDir = t.TempDir() // empty dir means IsUpdatePending() returns false
 	defer func() { updater.UpdateStageDir = oldStageDir }()
 	upd := &updater.Updater{Store: &store.Store{
 		DBPath: filepath.Join(t.TempDir(), "db2.sqlite"),
 	}}
 	defer upd.Store.Close()
 	// Initialize the checkNow channel by starting (and immediately stopping) the checker
 	// so TriggerImmediateCheck doesn't panic on nil channel
 	ctx, cancel := context.WithCancel(t.Context())
 	upd.StartBackgroundUpdaterChecker(ctx, func(string) error { return nil })
 	defer cancel()
 	var notificationCalled atomic.Bool
 	server := &Server{
 		Store:   testStore,
 		Restart: func() {},
 		Updater: upd,
 		UpdateAvailableFunc: func() {
 			notificationCalled.Store(true)
 		},
 	}
 	// Re-enable auto-update via settings API
 	settings.AutoUpdateEnabled = true
 	body, err := json.Marshal(settings)
 	if err != nil {
 		t.Fatal(err)
 	}
 	req := httptest.NewRequest("POST", "/api/v1/settings", bytes.NewReader(body))
 	req.Header.Set("Content-Type", "application/json")
 	rr := httptest.NewRecorder()
 	if err := server.settings(rr, req); err != nil {
 		t.Fatalf("settings() error = %v", err)
 	}
 	if rr.Code != http.StatusOK {
 		t.Fatalf("settings() status = %d, want %d", rr.Code, http.StatusOK)
 	}
 	// UpdateAvailableFunc should NOT be called since there's no pending update
 	if notificationCalled.Load() {
 		t.Fatal("UpdateAvailableFunc should not be called when there is no pending update")
 	}
 }
--- a/app/updater/updater.go
+++ b/app/updater/updater.go
@@ -19,6 +19,7 @@ import (
 	"runtime"
 	"strconv"
 	"strings"
 	"sync"
 	"time"
 	"github.com/ollama/ollama/app/store"
@@ -58,7 +59,8 @@ func (u *Updater) checkForUpdate(ctx context.Context) (bool, UpdateResponse) {
 	query := requestURL.Query()
 	query.Add("os", runtime.GOOS)
 	query.Add("arch", runtime.GOARCH)
-	query.Add("version", version.Version)
+	currentVersion := version.Version
 	query.Add("version", currentVersion)
 	query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))
 	// The original macOS app used to use the device ID
@@ -131,15 +133,27 @@ func (u *Updater) checkForUpdate(ctx context.Context) (bool, UpdateResponse) {
 }
 func (u *Updater) DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
 	// Create a cancellable context for this download
 	downloadCtx, cancel := context.WithCancel(ctx)
 	u.cancelDownloadLock.Lock()
 	u.cancelDownload = cancel
 	u.cancelDownloadLock.Unlock()
 	defer func() {
 		u.cancelDownloadLock.Lock()
 		u.cancelDownload = nil
 		u.cancelDownloadLock.Unlock()
 		cancel()
 	}()
 	// Do a head first to check etag info
-	req, err := http.NewRequestWithContext(ctx, http.MethodHead, updateResp.UpdateURL, nil)
+	req, err := http.NewRequestWithContext(downloadCtx, http.MethodHead, updateResp.UpdateURL, nil)
 	if err != nil {
 		return err
 	}
 	// In case of slow downloads, continue the update check in the background
-	bgctx, cancel := context.WithCancel(ctx)
+	bgctx, bgcancel := context.WithCancel(downloadCtx)
-	defer cancel()
+	defer bgcancel()
 	go func() {
 		for {
 			select {
@@ -176,6 +190,7 @@ func (u *Updater) DownloadNewRelease(ctx context.Context, updateResp UpdateRespo
 	_, err = os.Stat(stageFilename)
 	if err == nil {
 		slog.Info("update already downloaded", "bundle", stageFilename)
 		UpdateDownloaded = true
 		return nil
 	}
@@ -244,33 +259,85 @@ func cleanupOldDownloads(stageDir string) {
 }
 type Updater struct {
-	Store *store.Store
+	Store              *store.Store
 	cancelDownload     context.CancelFunc
 	cancelDownloadLock sync.Mutex
 	checkNow           chan struct{}
 }
 // CancelOngoingDownload cancels any currently running download
 func (u *Updater) CancelOngoingDownload() {
 	u.cancelDownloadLock.Lock()
 	defer u.cancelDownloadLock.Unlock()
 	if u.cancelDownload != nil {
 		slog.Info("cancelling ongoing update download")
 		u.cancelDownload()
 		u.cancelDownload = nil
 	}
 }
 // TriggerImmediateCheck signals the background checker to check for updates immediately
 func (u *Updater) TriggerImmediateCheck() {
 	if u.checkNow != nil {
 		select {
 		case u.checkNow <- struct{}{}:
 		default:
 			// Check already pending, no need to queue another
 		}
 	}
 }
 func (u *Updater) StartBackgroundUpdaterChecker(ctx context.Context, cb func(string) error) {
 	u.checkNow = make(chan struct{}, 1)
 	u.checkNow <- struct{}{} // Trigger first check after initial delay
 	go func() {
 		// Don't blast an update message immediately after startup
 		time.Sleep(UpdateCheckInitialDelay)
 		slog.Info("beginning update checker", "interval", UpdateCheckInterval)
 		ticker := time.NewTicker(UpdateCheckInterval)
 		defer ticker.Stop()
 		for {
 			available, resp := u.checkForUpdate(ctx)
 			if available {
 				err := u.DownloadNewRelease(ctx, resp)
 				if err != nil {
 					slog.Error(fmt.Sprintf("failed to download new release: %s", err))
 				} else {
 					err = cb(resp.UpdateVersion)
 					if err != nil {
 						slog.Warn(fmt.Sprintf("failed to register update available with tray: %s", err))
 					}
 				}
 			}
 			select {
 			case <-ctx.Done():
 				slog.Debug("stopping background update checker")
 				return
-			default:
+			case <-u.checkNow:
-				time.Sleep(UpdateCheckInterval)
+				// Immediate check triggered
 			case <-ticker.C:
 				// Regular interval check
 			}
 			// Always check for updates
 			available, resp := u.checkForUpdate(ctx)
 			if !available {
 				continue
 			}
 			// Update is available - check if auto-update is enabled for downloading
 			settings, err := u.Store.Settings()
 			if err != nil {
 				slog.Error("failed to load settings", "error", err)
 				continue
 			}
 			if !settings.AutoUpdateEnabled {
 				// Auto-update disabled - don't download, just log
 				slog.Debug("update available but auto-update disabled", "version", resp.UpdateVersion)
 				continue
 			}
 			// Auto-update is enabled - download
 			err = u.DownloadNewRelease(ctx, resp)
 			if err != nil {
 				slog.Error("failed to download new release", "error", err)
 				continue
 			}
 			// Download successful - show tray notification
 			err = cb(resp.UpdateVersion)
 			if err != nil {
 				slog.Warn("failed to register update available with tray", "error", err)
 			}
 		}
 	}()
--- a/app/updater/updater_test.go
+++ b/app/updater/updater_test.go
@@ -11,6 +11,8 @@ import (
 	"log/slog"
 	"net/http"
 	"net/http/httptest"
 	"path/filepath"
 	"sync/atomic"
 	"testing"
 	"time"
@@ -33,7 +35,7 @@ func TestIsNewReleaseAvailable(t *testing.T) {
 	defer server.Close()
 	slog.Debug("server", "url", server.URL)
-	updater := &Updater{Store: &store.Store{}}
+	updater := &Updater{Store: &store.Store{DBPath: filepath.Join(t.TempDir(), "test.db")}}
 	defer updater.Store.Close() // Ensure database is closed
 	UpdateCheckURLBase = server.URL + "/update.json"
 	updatePresent, resp := updater.checkForUpdate(t.Context())
@@ -84,8 +86,18 @@ func TestBackgoundChecker(t *testing.T) {
 	defer server.Close()
 	UpdateCheckURLBase = server.URL + "/update.json"
-	updater := &Updater{Store: &store.Store{}}
+	updater := &Updater{Store: &store.Store{DBPath: filepath.Join(t.TempDir(), "test.db")}}
-	defer updater.Store.Close() // Ensure database is closed
+	defer updater.Store.Close()
 	settings, err := updater.Store.Settings()
 	if err != nil {
 		t.Fatal(err)
 	}
 	settings.AutoUpdateEnabled = true
 	if err := updater.Store.SetSettings(settings); err != nil {
 		t.Fatal(err)
 	}
 	updater.StartBackgroundUpdaterChecker(ctx, cb)
 	select {
 	case <-stallTimer.C:
@@ -99,3 +111,267 @@ func TestBackgoundChecker(t *testing.T) {
 		}
 	}
 }
 func TestAutoUpdateDisabledSkipsDownload(t *testing.T) {
 	UpdateStageDir = t.TempDir()
 	var downloadAttempted atomic.Bool
 	done := make(chan struct{})
 	ctx, cancel := context.WithCancel(t.Context())
 	defer cancel()
 	UpdateCheckInitialDelay = 5 * time.Millisecond
 	UpdateCheckInterval = 5 * time.Millisecond
 	VerifyDownload = func() error {
 		return nil
 	}
 	var server *httptest.Server
 	server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		if r.URL.Path == "/update.json" {
 			w.Write([]byte(
 				fmt.Sprintf(`{"version": "9.9.9", "url": "%s"}`,
 					server.URL+"/9.9.9/"+Installer)))
 		} else if r.URL.Path == "/9.9.9/"+Installer {
 			downloadAttempted.Store(true)
 			buf := &bytes.Buffer{}
 			zw := zip.NewWriter(buf)
 			zw.Close()
 			io.Copy(w, buf)
 		}
 	}))
 	defer server.Close()
 	UpdateCheckURLBase = server.URL + "/update.json"
 	updater := &Updater{Store: &store.Store{DBPath: filepath.Join(t.TempDir(), "test.db")}}
 	defer updater.Store.Close()
 	// Ensure auto-update is disabled
 	settings, err := updater.Store.Settings()
 	if err != nil {
 		t.Fatal(err)
 	}
 	settings.AutoUpdateEnabled = false
 	if err := updater.Store.SetSettings(settings); err != nil {
 		t.Fatal(err)
 	}
 	cb := func(ver string) error {
 		t.Fatal("callback should not be called when auto-update is disabled")
 		return nil
 	}
 	updater.StartBackgroundUpdaterChecker(ctx, cb)
 	// Wait enough time for multiple check cycles
 	time.Sleep(50 * time.Millisecond)
 	close(done)
 	if downloadAttempted.Load() {
 		t.Fatal("download should not be attempted when auto-update is disabled")
 	}
 }
 func TestAutoUpdateReenabledDownloadsUpdate(t *testing.T) {
 	UpdateStageDir = t.TempDir()
 	var downloadAttempted atomic.Bool
 	callbackCalled := make(chan struct{}, 1)
 	ctx, cancel := context.WithCancel(t.Context())
 	defer cancel()
 	UpdateCheckInitialDelay = 5 * time.Millisecond
 	UpdateCheckInterval = 5 * time.Millisecond
 	VerifyDownload = func() error {
 		return nil
 	}
 	var server *httptest.Server
 	server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		if r.URL.Path == "/update.json" {
 			w.Write([]byte(
 				fmt.Sprintf(`{"version": "9.9.9", "url": "%s"}`,
 					server.URL+"/9.9.9/"+Installer)))
 		} else if r.URL.Path == "/9.9.9/"+Installer {
 			downloadAttempted.Store(true)
 			buf := &bytes.Buffer{}
 			zw := zip.NewWriter(buf)
 			zw.Close()
 			io.Copy(w, buf)
 		}
 	}))
 	defer server.Close()
 	UpdateCheckURLBase = server.URL + "/update.json"
 	upd := &Updater{Store: &store.Store{DBPath: filepath.Join(t.TempDir(), "test.db")}}
 	defer upd.Store.Close()
 	// Start with auto-update disabled
 	settings, err := upd.Store.Settings()
 	if err != nil {
 		t.Fatal(err)
 	}
 	settings.AutoUpdateEnabled = false
 	if err := upd.Store.SetSettings(settings); err != nil {
 		t.Fatal(err)
 	}
 	cb := func(ver string) error {
 		select {
 		case callbackCalled <- struct{}{}:
 		default:
 		}
 		return nil
 	}
 	upd.StartBackgroundUpdaterChecker(ctx, cb)
 	// Wait for a few cycles with auto-update disabled - no download should happen
 	time.Sleep(50 * time.Millisecond)
 	if downloadAttempted.Load() {
 		t.Fatal("download should not happen while auto-update is disabled")
 	}
 	// Re-enable auto-update
 	settings.AutoUpdateEnabled = true
 	if err := upd.Store.SetSettings(settings); err != nil {
 		t.Fatal(err)
 	}
 	// Wait for the checker to pick it up and download
 	select {
 	case <-callbackCalled:
 		// Success: download happened and callback was called after re-enabling
 		if !downloadAttempted.Load() {
 			t.Fatal("expected download to be attempted after re-enabling")
 		}
 	case <-time.After(5 * time.Second):
 		t.Fatal("expected download and callback after re-enabling auto-update")
 	}
 }
 func TestCancelOngoingDownload(t *testing.T) {
 	UpdateStageDir = t.TempDir()
 	downloadStarted := make(chan struct{})
 	downloadCancelled := make(chan struct{})
 	ctx := t.Context()
 	VerifyDownload = func() error {
 		return nil
 	}
 	var server *httptest.Server
 	server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		if r.URL.Path == "/update.json" {
 			w.Write([]byte(
 				fmt.Sprintf(`{"version": "9.9.9", "url": "%s"}`,
 					server.URL+"/9.9.9/"+Installer)))
 		} else if r.URL.Path == "/9.9.9/"+Installer {
 			if r.Method == http.MethodHead {
 				w.Header().Set("Content-Length", "1000000")
 				w.WriteHeader(http.StatusOK)
 				return
 			}
 			// Signal that download has started
 			close(downloadStarted)
 			// Wait for cancellation or timeout
 			select {
 			case <-r.Context().Done():
 				close(downloadCancelled)
 				return
 			case <-time.After(5 * time.Second):
 				t.Error("download was not cancelled in time")
 			}
 		}
 	}))
 	defer server.Close()
 	UpdateCheckURLBase = server.URL + "/update.json"
 	updater := &Updater{Store: &store.Store{DBPath: filepath.Join(t.TempDir(), "test.db")}}
 	defer updater.Store.Close()
 	_, resp := updater.checkForUpdate(ctx)
 	// Start download in goroutine
 	go func() {
 		_ = updater.DownloadNewRelease(ctx, resp)
 	}()
 	// Wait for download to start
 	select {
 	case <-downloadStarted:
 	case <-time.After(2 * time.Second):
 		t.Fatal("download did not start in time")
 	}
 	// Cancel the download
 	updater.CancelOngoingDownload()
 	// Verify cancellation was received
 	select {
 	case <-downloadCancelled:
 		// Success
 	case <-time.After(2 * time.Second):
 		t.Fatal("download cancellation was not received by server")
 	}
 }
 func TestTriggerImmediateCheck(t *testing.T) {
 	UpdateStageDir = t.TempDir()
 	checkCount := atomic.Int32{}
 	checkDone := make(chan struct{}, 10)
 	ctx, cancel := context.WithCancel(t.Context())
 	defer cancel()
 	// Set a very long interval so only TriggerImmediateCheck causes checks
 	UpdateCheckInitialDelay = 1 * time.Millisecond
 	UpdateCheckInterval = 1 * time.Hour
 	VerifyDownload = func() error {
 		return nil
 	}
 	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		if r.URL.Path == "/update.json" {
 			checkCount.Add(1)
 			select {
 			case checkDone <- struct{}{}:
 			default:
 			}
 			// Return no update available
 			w.WriteHeader(http.StatusNoContent)
 		}
 	}))
 	defer server.Close()
 	UpdateCheckURLBase = server.URL + "/update.json"
 	updater := &Updater{Store: &store.Store{DBPath: filepath.Join(t.TempDir(), "test.db")}}
 	defer updater.Store.Close()
 	cb := func(ver string) error {
 		return nil
 	}
 	updater.StartBackgroundUpdaterChecker(ctx, cb)
 	// Wait for the initial check that fires after the initial delay
 	select {
 	case <-checkDone:
 	case <-time.After(2 * time.Second):
 		t.Fatal("initial check did not happen")
 	}
 	initialCount := checkCount.Load()
 	// Trigger immediate check
 	updater.TriggerImmediateCheck()
 	// Wait for the triggered check
 	select {
 	case <-checkDone:
 	case <-time.After(2 * time.Second):
 		t.Fatal("triggered check did not happen")
 	}
 	finalCount := checkCount.Load()
 	if finalCount <= initialCount {
 		t.Fatalf("TriggerImmediateCheck did not cause additional check: initial=%d, final=%d", initialCount, finalCount)
 	}
 }
--- a/app/wintray/tray.go
+++ b/app/wintray/tray.go
@@ -369,25 +369,6 @@ func (t *winTray) addSeparatorMenuItem(menuItemId, parentId uint32) error {
 	return nil
 }
 // func (t *winTray) hideMenuItem(menuItemId, parentId uint32) error {
 // 	const ERROR_SUCCESS syscall.Errno = 0
 // 	t.muMenus.RLock()
 // 	menu := uintptr(t.menus[parentId])
 // 	t.muMenus.RUnlock()
 // 	res, _, err := pRemoveMenu.Call(
 // 		menu,
 // 		uintptr(menuItemId),
 // 		MF_BYCOMMAND,
 // 	)
 // 	if res == 0 && err.(syscall.Errno) != ERROR_SUCCESS {
 // 		return err
 // 	}
 // 	t.delFromVisibleItems(parentId, menuItemId)
 // 	return nil
 // }
 func (t *winTray) showMenu() error {
 	p := point{}
 	boolRet, _, err := pGetCursorPos.Call(uintptr(unsafe.Pointer(&p)))
--- a/app/wintray/w32api.go
+++ b/app/wintray/w32api.go
@@ -51,7 +51,6 @@ const (
 	IMAGE_ICON          = 1          // Loads an icon
 	LR_DEFAULTSIZE      = 0x00000040 // Loads default-size icon for windows(SM_CXICON x SM_CYICON) if cx, cy are set to zero
 	LR_LOADFROMFILE     = 0x00000010 // Loads the stand-alone image from the file
 	MF_BYCOMMAND        = 0x00000000
 	MFS_DISABLED        = 0x00000003
 	MFT_SEPARATOR       = 0x00000800
 	MFT_STRING          = 0x00000000
--- a/cmd/background_unix.go
+++ b/cmd/background_unix.go
@@ -0,0 +1,13 @@
 //go:build !windows
 package cmd
 import "syscall"
 // backgroundServerSysProcAttr returns SysProcAttr for running the server in the background on Unix.
 // Setpgid prevents the server from being killed when the parent process exits.
 func backgroundServerSysProcAttr() *syscall.SysProcAttr {
 	return &syscall.SysProcAttr{
 		Setpgid: true,
 	}
 }
--- a/cmd/background_windows.go
+++ b/cmd/background_windows.go
@@ -0,0 +1,12 @@
 package cmd
 import "syscall"
 // backgroundServerSysProcAttr returns SysProcAttr for running the server in the background on Windows.
 // CREATE_NO_WINDOW (0x08000000) prevents a console window from appearing.
 func backgroundServerSysProcAttr() *syscall.SysProcAttr {
 	return &syscall.SysProcAttr{
 		CreationFlags: 0x08000000,
 		HideWindow:    true,
 	}
 }
--- a/cmd/bench/README.md
+++ b/cmd/bench/README.md
@@ -1,27 +1,31 @@
 Ollama Benchmark Tool
 ---------------------
-A Go-based command-line tool for benchmarking Ollama models with configurable parameters and multiple output formats.
+A Go-based command-line tool for benchmarking Ollama models with configurable parameters, warmup phases, TTFT tracking, VRAM monitoring, and benchstat/CSV output.
 ## Features
 * Benchmark multiple models in a single run
 * Support for both text and image prompts
 * Configurable generation parameters (temperature, max tokens, seed, etc.)
- * Supports benchstat and CSV output formats
+ * Warmup phase before timed epochs to stabilize measurements
- * Detailed performance metrics (prefill, generate, load, total durations)
+ * Time-to-first-token (TTFT) tracking per epoch
 * Model metadata display (parameter size, quantization level, family)
 * VRAM and CPU memory usage tracking via running process info
 * Controlled prompt token length for reproducible benchmarks
 * Benchstat and CSV output formats
 ## Building from Source
 ```
-go build -o ollama-bench bench.go
+go build -o ollama-bench ./cmd/bench
-./ollama-bench -model gpt-oss:20b -epochs 6 -format csv
+./ollama-bench -model gemma3 -epochs 6 -format csv
 ```
 Using Go Run (without building)
 ```
-go run bench.go -model gpt-oss:20b -epochs 3
+go run ./cmd/bench -model gemma3 -epochs 3
 ```
 ## Usage
@@ -45,10 +49,16 @@ benchstat -col /name gemma.bench
 ./ollama-bench -model qwen3-vl -image photo.jpg -epochs 6 -max-tokens 100 -p "Describe this image"
 ```
 ### Controlled Prompt Length
 ```
 ./ollama-bench -model gemma3 -epochs 6 -prompt-tokens 512
 ```
 ### Advanced Example
 ```
-./ollama-bench -model llama3 -epochs 10 -temperature 0.7 -max-tokens 500 -seed 42 -format csv -output results.csv
+./ollama-bench -model llama3 -epochs 10 -temperature 0.7 -max-tokens 500 -seed 42 -warmup 2 -format csv -output results.csv
 ```
 ## Command Line Options
@@ -56,41 +66,48 @@ benchstat -col /name gemma.bench
 | Option  	| Description | Default |
 |----------|-------------|---------|
 | -model	| Comma-separated list of models to benchmark	| (required)		|
-| -epochs	| Number of iterations per model		| 1			|
+| -epochs	| Number of iterations per model		| 6			|
-| -max-tokens	| Maximum tokens for model response		| 0 (unlimited)		|
+| -max-tokens	| Maximum tokens for model response		| 200			|
 | -temperature	| Temperature parameter				| 0.0			|
 | -seed		| Random seed					| 0 (random)		|
 | -timeout	| Timeout in seconds				| 300			|
-| -p		| Prompt text					| "Write a long story."	|
+| -p		| Prompt text					| (default story prompt)	|
 | -image	| Image file to include in prompt		| 			|
 | -k		| Keep-alive duration in seconds		| 0			|
 | -format	| Output format (benchstat, csv)		| benchstat		|
 | -output	| Output file for results			| "" (stdout)		|
 | -warmup	| Number of warmup requests before timing	| 1			|
 | -prompt-tokens	| Generate prompt targeting ~N tokens (0 = use -p)	| 0		|
 | -v		| Verbose mode					| false			|
 | -debug	| Show debug information			| false			|
 ## Output Formats
-### Markdown Format
+### Benchstat Format (default)
-The default markdown format is suitable for copying and pasting into a GitHub issue and will look like:
+Compatible with Go's benchstat tool for statistical analysis. Uses one value/unit pair per line, standard `ns/op` for timing metrics, and `ns/token` for throughput. Each epoch produces one set of lines -- benchstat aggregates across repeated runs to compute statistics.
 ```
 Model | Step | Count | Duration | nsPerToken | tokensPerSec |
 |-------|------|-------|----------|------------|--------------|
 | gpt-oss:20b | prefill | 124 | 30.006458ms | 241987.56 | 4132.44 |
 | gpt-oss:20b | generate | 200 | 2.646843954s | 13234219.77 | 75.56 |
 | gpt-oss:20b | load | 1 | 121.674208ms | - | - |
 | gpt-oss:20b | total | 1 | 2.861047625s | - | - |
 ```
 ### Benchstat Format
 Compatible with Go's benchstat tool for statistical analysis:
 ```
-BenchmarkModel/name=gpt-oss:20b/step=prefill 128 78125.00 ns/token 12800.00 token/sec
+# Model: gemma3 | Params: 4.3B | Quant: Q4_K_M | Family: gemma3 | Size: 4080218931 | VRAM: 4080218931
-BenchmarkModel/name=gpt-oss:20b/step=generate 512 19531.25 ns/token 51200.00 token/sec
+BenchmarkModel/name=gemma3/step=prefill 1 78125.00 ns/token 12800.00 token/sec
-BenchmarkModel/name=gpt-oss:20b/step=load 1 1500000000 ns/request
+BenchmarkModel/name=gemma3/step=generate 1 19531.25 ns/token 51200.00 token/sec
 BenchmarkModel/name=gemma3/step=ttft 1 45123000 ns/op
 BenchmarkModel/name=gemma3/step=load 1 1500000000 ns/op
 BenchmarkModel/name=gemma3/step=total 1 2861047625 ns/op
 ```
 Use with benchstat:
 ```
 ./ollama-bench -model gemma3 -epochs 6 > gemma3.bench
 benchstat -col /step gemma3.bench
 ```
 Compare two runs:
 ```
 ./ollama-bench -model gemma3 -epochs 6 > before.bench
 # ... make changes ...
 ./ollama-bench -model gemma3 -epochs 6 > after.bench
 benchstat before.bench after.bench
 ```
 ### CSV Format
@@ -99,17 +116,28 @@ Machine-readable comma-separated values:
 ```
 NAME,STEP,COUNT,NS_PER_COUNT,TOKEN_PER_SEC
-gpt-oss:20b,prefill,128,78125.00,12800.00
+# Model: gemma3 | Params: 4.3B | Quant: Q4_K_M | Family: gemma3 | Size: 4080218931 | VRAM: 4080218931
-gpt-oss:20b,generate,512,19531.25,51200.00
+gemma3,prefill,128,78125.00,12800.00
-gpt-oss:20b,load,1,1500000000,0
+gemma3,generate,512,19531.25,51200.00
 gemma3,ttft,1,45123000,0
 gemma3,load,1,1500000000,0
 gemma3,total,1,2861047625,0
 ```
 ## Metrics Explained
-The tool reports four types of metrics for each model:
+The tool reports the following metrics for each epoch:
- * prefill: Time spent processing the prompt
+ * **prefill**: Time spent processing the prompt (ns/token)
- * generate: Time spent generating the response
+ * **generate**: Time spent generating the response (ns/token)
- * load: Model loading time (one-time cost)
+ * **ttft**: Time to first token -- latency from request start to first response content
- * total: Total request duration
+ * **load**: Model loading time (one-time cost)
 * **total**: Total request duration
 Additionally, the model info comment line (displayed once per model before epochs) includes:
 * **Params**: Model parameter count (e.g., 4.3B)
 * **Quant**: Quantization level (e.g., Q4_K_M)
 * **Family**: Model family (e.g., gemma3)
 * **Size**: Total model memory in bytes
 * **VRAM**: GPU memory used by the loaded model (when Size > VRAM, the difference is CPU spill)
--- a/cmd/bench/bench.go
+++ b/cmd/bench/bench.go
@@ -17,19 +17,22 @@ import (
 )
 type flagOptions struct {
-	models      *string
+	models       *string
-	epochs      *int
+	epochs       *int
-	maxTokens   *int
+	maxTokens    *int
-	temperature *float64
+	temperature  *float64
-	seed        *int
+	seed         *int
-	timeout     *int
+	timeout      *int
-	prompt      *string
+	prompt       *string
-	imageFile   *string
+	imageFile    *string
-	keepAlive   *float64
+	keepAlive    *float64
-	format      *string
+	format       *string
-	outputFile  *string
+	outputFile   *string
-	debug       *bool
+	debug        *bool
-	verbose     *bool
+	verbose      *bool
 	warmup       *int
 	promptTokens *int
 	numCtx       *int
 }
 type Metrics struct {
@@ -39,48 +42,203 @@ type Metrics struct {
 	Duration time.Duration
 }
-var once sync.Once
+type ModelInfo struct {
 	Name              string
 	ParameterSize     string
 	QuantizationLevel string
 	Family            string
 	SizeBytes         int64
 	VRAMBytes         int64
 	NumCtx            int64
 }
 const DefaultPrompt = `Please write a descriptive story about a llama named Alonso who grows up to be President of the Land of Llamas. Include details about Alonso's childhood, adolescent years, and how he grew up to be a political mover and shaker. Write the story with a sense of whimsy.`
 // Word list for generating prompts targeting a specific token count.
 var promptWordList = []string{
 	"the", "quick", "brown", "fox", "jumps", "over", "lazy", "dog",
 	"a", "bright", "sunny", "day", "in", "the", "meadow", "where",
 	"flowers", "bloom", "and", "birds", "sing", "their", "morning",
 	"songs", "while", "gentle", "breeze", "carries", "sweet", "scent",
 	"of", "pine", "trees", "across", "rolling", "hills", "toward",
 	"distant", "mountains", "covered", "with", "fresh", "snow",
 	"beneath", "clear", "blue", "sky", "children", "play", "near",
 	"old", "stone", "bridge", "that", "crosses", "winding", "river",
 }
 // tokensPerWord is the calibrated ratio of tokens to words for the current model.
 // Initialized with a heuristic, then updated during warmup based on actual tokenization.
 var tokensPerWord = 1.3
 func generatePromptForTokenCount(targetTokens int, epoch int) string {
 	targetWords := int(float64(targetTokens) / tokensPerWord)
 	if targetWords < 1 {
 		targetWords = 1
 	}
 	// Vary the starting offset by epoch to defeat KV cache prefix matching
 	offset := epoch * 7 // stride by a prime to get good distribution
 	n := len(promptWordList)
 	words := make([]string, targetWords)
 	for i := range words {
 		words[i] = promptWordList[((i+offset)%n+n)%n]
 	}
 	return strings.Join(words, " ")
 }
 // calibratePromptTokens adjusts tokensPerWord based on actual tokenization from a warmup run.
 func calibratePromptTokens(targetTokens, actualTokens, wordCount int) {
 	if actualTokens <= 0 || wordCount <= 0 {
 		return
 	}
 	tokensPerWord = float64(actualTokens) / float64(wordCount)
 	newWords := int(float64(targetTokens) / tokensPerWord)
 	fmt.Fprintf(os.Stderr, "bench: calibrated %.2f tokens/word (target=%d, got=%d, words=%d → %d)\n",
 		tokensPerWord, targetTokens, actualTokens, wordCount, newWords)
 }
 func buildGenerateRequest(model string, fOpt flagOptions, imgData api.ImageData, epoch int) *api.GenerateRequest {
 	options := make(map[string]interface{})
 	if *fOpt.maxTokens > 0 {
 		options["num_predict"] = *fOpt.maxTokens
 	}
 	options["temperature"] = *fOpt.temperature
 	if fOpt.seed != nil && *fOpt.seed > 0 {
 		options["seed"] = *fOpt.seed
 	}
 	if fOpt.numCtx != nil && *fOpt.numCtx > 0 {
 		options["num_ctx"] = *fOpt.numCtx
 	}
 	var keepAliveDuration *api.Duration
 	if *fOpt.keepAlive > 0 {
 		duration := api.Duration{Duration: time.Duration(*fOpt.keepAlive * float64(time.Second))}
 		keepAliveDuration = &duration
 	}
 	prompt := *fOpt.prompt
 	if *fOpt.promptTokens > 0 {
 		prompt = generatePromptForTokenCount(*fOpt.promptTokens, epoch)
 	} else {
 		// Vary the prompt per epoch to defeat KV cache prefix matching
 		prompt = fmt.Sprintf("[%d] %s", epoch, prompt)
 	}
 	req := &api.GenerateRequest{
 		Model:     model,
 		Prompt:    prompt,
 		Raw:       true,
 		Options:   options,
 		KeepAlive: keepAliveDuration,
 	}
 	if imgData != nil {
 		req.Images = []api.ImageData{imgData}
 	}
 	return req
 }
 func fetchModelInfo(ctx context.Context, client *api.Client, model string) ModelInfo {
 	info := ModelInfo{Name: model}
 	resp, err := client.Show(ctx, &api.ShowRequest{Model: model})
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "WARNING: Could not fetch model info for '%s': %v\n", model, err)
 		return info
 	}
 	info.ParameterSize = resp.Details.ParameterSize
 	info.QuantizationLevel = resp.Details.QuantizationLevel
 	info.Family = resp.Details.Family
 	return info
 }
 func fetchMemoryUsage(ctx context.Context, client *api.Client, model string) (size, vram int64) {
 	resp, err := client.ListRunning(ctx)
 	if err != nil {
 		if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
 			fmt.Fprintf(os.Stderr, "WARNING: Could not fetch memory usage: %v\n", err)
 		}
 		return 0, 0
 	}
 	for _, m := range resp.Models {
 		if m.Name == model || m.Model == model {
 			return m.Size, m.SizeVRAM
 		}
 	}
 	for _, m := range resp.Models {
 		if strings.HasPrefix(m.Name, model) || strings.HasPrefix(m.Model, model) {
 			return m.Size, m.SizeVRAM
 		}
 	}
 	return 0, 0
 }
 func fetchContextLength(ctx context.Context, client *api.Client, model string) int64 {
 	resp, err := client.ListRunning(ctx)
 	if err != nil {
 		return 0
 	}
 	for _, m := range resp.Models {
 		if m.Name == model || m.Model == model || strings.HasPrefix(m.Name, model) || strings.HasPrefix(m.Model, model) {
 			return int64(m.ContextLength)
 		}
 	}
 	return 0
 }
 func outputFormatHeader(w io.Writer, format string, verbose bool) {
 	switch format {
 	case "benchstat":
 		if verbose {
 			fmt.Fprintf(w, "goos: %s\n", runtime.GOOS)
 			fmt.Fprintf(w, "goarch: %s\n", runtime.GOARCH)
 		}
 	case "csv":
 		headings := []string{"NAME", "STEP", "COUNT", "NS_PER_COUNT", "TOKEN_PER_SEC"}
 		fmt.Fprintln(w, strings.Join(headings, ","))
 	}
 }
 func outputModelInfo(w io.Writer, format string, info ModelInfo) {
 	params := cmp.Or(info.ParameterSize, "unknown")
 	quant := cmp.Or(info.QuantizationLevel, "unknown")
 	family := cmp.Or(info.Family, "unknown")
 	memStr := ""
 	if info.SizeBytes > 0 {
 		memStr = fmt.Sprintf(" | Size: %d | VRAM: %d", info.SizeBytes, info.VRAMBytes)
 	}
 	ctxStr := ""
 	if info.NumCtx > 0 {
 		ctxStr = fmt.Sprintf(" | NumCtx: %d", info.NumCtx)
 	}
 	fmt.Fprintf(w, "# Model: %s | Params: %s | Quant: %s | Family: %s%s%s\n",
 		info.Name, params, quant, family, memStr, ctxStr)
 }
 func OutputMetrics(w io.Writer, format string, metrics []Metrics, verbose bool) {
 	switch format {
 	case "benchstat":
 		if verbose {
 			printHeader := func() {
 				fmt.Fprintf(w, "sysname: %s\n", runtime.GOOS)
 				fmt.Fprintf(w, "machine: %s\n", runtime.GOARCH)
 			}
 			once.Do(printHeader)
 		}
 		for _, m := range metrics {
 			if m.Step == "generate" || m.Step == "prefill" {
 				if m.Count > 0 {
 					nsPerToken := float64(m.Duration.Nanoseconds()) / float64(m.Count)
 					tokensPerSec := float64(m.Count) / (float64(m.Duration.Nanoseconds()) + 1e-12) * 1e9
-
+					fmt.Fprintf(w, "BenchmarkModel/name=%s/step=%s 1 %.2f ns/token %.2f token/sec\n",
-					fmt.Fprintf(w, "BenchmarkModel/name=%s/step=%s %d %.2f ns/token %.2f token/sec\n",
+						m.Model, m.Step, nsPerToken, tokensPerSec)
 						m.Model, m.Step, m.Count, nsPerToken, tokensPerSec)
 				} else {
-					fmt.Fprintf(w, "BenchmarkModel/name=%s/step=%s %d 0 ns/token 0 token/sec\n",
+					fmt.Fprintf(w, "BenchmarkModel/name=%s/step=%s 1 0 ns/token 0 token/sec\n",
-						m.Model, m.Step, m.Count)
+						m.Model, m.Step)
 				}
 			} else if m.Step == "ttft" {
 				fmt.Fprintf(w, "BenchmarkModel/name=%s/step=ttft 1 %d ns/op\n",
 					m.Model, m.Duration.Nanoseconds())
 			} else {
-				var suffix string
+				fmt.Fprintf(w, "BenchmarkModel/name=%s/step=%s 1 %d ns/op\n",
-				if m.Step == "load" {
+					m.Model, m.Step, m.Duration.Nanoseconds())
 					suffix = "/step=load"
 				}
 				fmt.Fprintf(w, "BenchmarkModel/name=%s%s 1 %d ns/request\n",
 					m.Model, suffix, m.Duration.Nanoseconds())
 			}
 		}
 	case "csv":
 		printHeader := func() {
 			headings := []string{"NAME", "STEP", "COUNT", "NS_PER_COUNT", "TOKEN_PER_SEC"}
 			fmt.Fprintln(w, strings.Join(headings, ","))
 		}
 		once.Do(printHeader)
 		for _, m := range metrics {
 			if m.Step == "generate" || m.Step == "prefill" {
 				var nsPerToken float64
@@ -94,39 +252,14 @@ func OutputMetrics(w io.Writer, format string, metrics []Metrics, verbose bool)
 				fmt.Fprintf(w, "%s,%s,1,%d,0\n", m.Model, m.Step, m.Duration.Nanoseconds())
 			}
 		}
 	case "markdown":
 		printHeader := func() {
 			fmt.Fprintln(w, "| Model | Step | Count | Duration | nsPerToken | tokensPerSec |")
 			fmt.Fprintln(w, "|-------|------|-------|----------|------------|--------------|")
 		}
 		once.Do(printHeader)
 		for _, m := range metrics {
 			var nsPerToken, tokensPerSec float64
 			var nsPerTokenStr, tokensPerSecStr string
 			if m.Step == "generate" || m.Step == "prefill" {
 				nsPerToken = float64(m.Duration.Nanoseconds()) / float64(m.Count)
 				tokensPerSec = float64(m.Count) / (float64(m.Duration.Nanoseconds()) + 1e-12) * 1e9
 				nsPerTokenStr = fmt.Sprintf("%.2f", nsPerToken)
 				tokensPerSecStr = fmt.Sprintf("%.2f", tokensPerSec)
 			} else {
 				nsPerTokenStr = "-"
 				tokensPerSecStr = "-"
 			}
 			fmt.Fprintf(w, "| %s | %s | %d | %v | %s | %s |\n",
 				m.Model, m.Step, m.Count, m.Duration, nsPerTokenStr, tokensPerSecStr)
 		}
 	default:
 		fmt.Fprintf(os.Stderr, "Unknown output format '%s'\n", format)
 	}
 }
-func BenchmarkChat(fOpt flagOptions) error {
+func BenchmarkModel(fOpt flagOptions) error {
 	models := strings.Split(*fOpt.models, ",")
 	// todo - add multi-image support
 	var imgData api.ImageData
 	var err error
 	if *fOpt.imageFile != "" {
@@ -158,71 +291,141 @@ func BenchmarkChat(fOpt flagOptions) error {
 		out = f
 	}
 	outputFormatHeader(out, *fOpt.format, *fOpt.verbose)
 	// Log prompt-tokens info in debug mode
 	if *fOpt.debug && *fOpt.promptTokens > 0 {
 		prompt := generatePromptForTokenCount(*fOpt.promptTokens, 0)
 		wordCount := len(strings.Fields(prompt))
 		fmt.Fprintf(os.Stderr, "Generated prompt targeting ~%d tokens (%d words, varied per epoch)\n", *fOpt.promptTokens, wordCount)
 	}
 	for _, model := range models {
-		for range *fOpt.epochs {
+		// Fetch model info
-			options := make(map[string]interface{})
+		infoCtx, infoCancel := context.WithTimeout(context.Background(), 10*time.Second)
-			if *fOpt.maxTokens > 0 {
+		info := fetchModelInfo(infoCtx, client, model)
-				options["num_predict"] = *fOpt.maxTokens
+		infoCancel()
 			}
 			options["temperature"] = *fOpt.temperature
 			if fOpt.seed != nil && *fOpt.seed > 0 {
 				options["seed"] = *fOpt.seed
 			}
 			var keepAliveDuration *api.Duration
 			if *fOpt.keepAlive > 0 {
 				duration := api.Duration{Duration: time.Duration(*fOpt.keepAlive * float64(time.Second))}
 				keepAliveDuration = &duration
 			}
 			req := &api.ChatRequest{
 				Model: model,
 				Messages: []api.Message{
 					{
 						Role:    "user",
 						Content: *fOpt.prompt,
 					},
 				},
 				Options:   options,
 				KeepAlive: keepAliveDuration,
 			}
 			if imgData != nil {
 				req.Messages[0].Images = []api.ImageData{imgData}
 			}
 			var responseMetrics *api.Metrics
 		// Warmup phase (uses negative epoch numbers to avoid colliding with timed epochs)
 		for i := range *fOpt.warmup {
 			req := buildGenerateRequest(model, fOpt, imgData, -(i + 1))
 			ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*fOpt.timeout)*time.Second)
 			defer cancel()
 			err = client.Chat(ctx, req, func(resp api.ChatResponse) error {
 				if *fOpt.debug {
 					fmt.Fprintf(os.Stderr, "%s", cmp.Or(resp.Message.Thinking, resp.Message.Content))
 				}
 			var warmupMetrics *api.Metrics
 			err = client.Generate(ctx, req, func(resp api.GenerateResponse) error {
 				if resp.Done {
-					responseMetrics = &resp.Metrics
+					warmupMetrics = &resp.Metrics
 				}
 				return nil
 			})
-
+			cancel()
 			if *fOpt.debug {
 				fmt.Fprintln(os.Stderr)
 			}
 			if err != nil {
-				if ctx.Err() == context.DeadlineExceeded {
+				fmt.Fprintf(os.Stderr, "WARNING: Warmup %d/%d for %s failed: %v\n", i+1, *fOpt.warmup, model, err)
-					fmt.Fprintf(os.Stderr, "ERROR: Chat request timed out with model '%s' after %vs\n", model, 1)
+			} else {
-					continue
+				if *fOpt.debug {
 					fmt.Fprintf(os.Stderr, "Warmup %d/%d for %s complete\n", i+1, *fOpt.warmup, model)
 				}
-				fmt.Fprintf(os.Stderr, "ERROR: Couldn't chat with model '%s': %v\n", model, err)
+				// Calibrate prompt token count on last warmup run
 				if i == *fOpt.warmup-1 && *fOpt.promptTokens > 0 && warmupMetrics != nil {
 					prompt := generatePromptForTokenCount(*fOpt.promptTokens, -(i + 1))
 					wordCount := len(strings.Fields(prompt))
 					calibratePromptTokens(*fOpt.promptTokens, warmupMetrics.PromptEvalCount, wordCount)
 				}
 			}
 		}
 		// Fetch memory/context info once after warmup (model is loaded and stable)
 		memCtx, memCancel := context.WithTimeout(context.Background(), 5*time.Second)
 		info.SizeBytes, info.VRAMBytes = fetchMemoryUsage(memCtx, client, model)
 		if fOpt.numCtx != nil && *fOpt.numCtx > 0 {
 			info.NumCtx = int64(*fOpt.numCtx)
 		} else {
 			info.NumCtx = fetchContextLength(memCtx, client, model)
 		}
 		memCancel()
 		outputModelInfo(out, *fOpt.format, info)
 		// Timed epoch loop
 		shortCount := 0
 		for epoch := range *fOpt.epochs {
 			var responseMetrics *api.Metrics
 			var ttft time.Duration
 			short := false
 			// Retry loop: if the model hits a stop token before max-tokens,
 			// retry with a different prompt (up to maxRetries times).
 			const maxRetries = 3
 			for attempt := range maxRetries + 1 {
 				responseMetrics = nil
 				ttft = 0
 				var ttftOnce sync.Once
 				req := buildGenerateRequest(model, fOpt, imgData, epoch+attempt*1000)
 				requestStart := time.Now()
 				ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*fOpt.timeout)*time.Second)
 				err = client.Generate(ctx, req, func(resp api.GenerateResponse) error {
 					if *fOpt.debug {
 						fmt.Fprintf(os.Stderr, "%s", cmp.Or(resp.Thinking, resp.Response))
 					}
 					// Capture TTFT on first content
 					ttftOnce.Do(func() {
 						if resp.Response != "" || resp.Thinking != "" {
 							ttft = time.Since(requestStart)
 						}
 					})
 					if resp.Done {
 						responseMetrics = &resp.Metrics
 					}
 					return nil
 				})
 				cancel()
 				if *fOpt.debug {
 					fmt.Fprintln(os.Stderr)
 				}
 				if err != nil {
 					if ctx.Err() == context.DeadlineExceeded {
 						fmt.Fprintf(os.Stderr, "ERROR: Request timed out with model '%s' after %vs\n", model, *fOpt.timeout)
 					} else {
 						fmt.Fprintf(os.Stderr, "ERROR: Couldn't generate with model '%s': %v\n", model, err)
 					}
 					break
 				}
 				if responseMetrics == nil {
 					fmt.Fprintf(os.Stderr, "ERROR: No metrics received for model '%s'\n", model)
 					break
 				}
 				// Check if the response was shorter than requested
 				short = *fOpt.maxTokens > 0 && responseMetrics.EvalCount < *fOpt.maxTokens
 				if !short || attempt == maxRetries {
 					break
 				}
 				if *fOpt.debug {
 					fmt.Fprintf(os.Stderr, "Short response (%d/%d tokens), retrying with different prompt (attempt %d/%d)\n",
 						responseMetrics.EvalCount, *fOpt.maxTokens, attempt+1, maxRetries)
 				}
 			}
 			if err != nil || responseMetrics == nil {
 				continue
 			}
-			if responseMetrics == nil {
+			if short {
-				fmt.Fprintf(os.Stderr, "ERROR: No metrics received for model '%s'\n", model)
+				shortCount++
-				continue
+				if *fOpt.debug {
 					fmt.Fprintf(os.Stderr, "WARNING: Short response (%d/%d tokens) after %d retries for epoch %d\n",
 						responseMetrics.EvalCount, *fOpt.maxTokens, maxRetries, epoch+1)
 				}
 			}
 			metrics := []Metrics{
@@ -238,6 +441,12 @@ func BenchmarkChat(fOpt flagOptions) error {
 					Count:    responseMetrics.EvalCount,
 					Duration: responseMetrics.EvalDuration,
 				},
 				{
 					Model:    model,
 					Step:     "ttft",
 					Count:    1,
 					Duration: ttft,
 				},
 				{
 					Model:    model,
 					Step:     "load",
@@ -254,15 +463,42 @@ func BenchmarkChat(fOpt flagOptions) error {
 			OutputMetrics(out, *fOpt.format, metrics, *fOpt.verbose)
 			if *fOpt.debug && *fOpt.promptTokens > 0 {
 				fmt.Fprintf(os.Stderr, "Generated prompt targeting ~%d tokens (actual: %d)\n",
 					*fOpt.promptTokens, responseMetrics.PromptEvalCount)
 			}
 			if *fOpt.keepAlive > 0 {
 				time.Sleep(time.Duration(*fOpt.keepAlive*float64(time.Second)) + 200*time.Millisecond)
 			}
 		}
 		if shortCount > 0 {
 			fmt.Fprintf(os.Stderr, "WARNING: %d/%d epochs for '%s' had short responses (<%d tokens). Generation metrics may be unreliable.\n",
 				shortCount, *fOpt.epochs, model, *fOpt.maxTokens)
 		}
 		// Unload model before moving to the next one
 		unloadModel(client, model, *fOpt.timeout)
 	}
 	return nil
 }
 func unloadModel(client *api.Client, model string, timeout int) {
 	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
 	defer cancel()
 	zero := api.Duration{Duration: 0}
 	req := &api.GenerateRequest{
 		Model:     model,
 		KeepAlive: &zero,
 	}
 	_ = client.Generate(ctx, req, func(resp api.GenerateResponse) error {
 		return nil
 	})
 }
 func readImage(filePath string) (api.ImageData, error) {
 	file, err := os.Open(filePath)
 	if err != nil {
@@ -280,19 +516,22 @@ func readImage(filePath string) (api.ImageData, error) {
 func main() {
 	fOpt := flagOptions{
-		models:      flag.String("model", "", "Model to benchmark"),
+		models:       flag.String("model", "", "Model to benchmark"),
-		epochs:      flag.Int("epochs", 6, "Number of epochs (iterations) per model"),
+		epochs:       flag.Int("epochs", 6, "Number of epochs (iterations) per model"),
-		maxTokens:   flag.Int("max-tokens", 200, "Maximum tokens for model response"),
+		maxTokens:    flag.Int("max-tokens", 200, "Maximum tokens for model response"),
-		temperature: flag.Float64("temperature", 0, "Temperature parameter"),
+		temperature:  flag.Float64("temperature", 0, "Temperature parameter"),
-		seed:        flag.Int("seed", 0, "Random seed"),
+		seed:         flag.Int("seed", 0, "Random seed"),
-		timeout:     flag.Int("timeout", 60*5, "Timeout in seconds (default 300s)"),
+		timeout:      flag.Int("timeout", 60*5, "Timeout in seconds (default 300s)"),
-		prompt:      flag.String("p", DefaultPrompt, "Prompt to use"),
+		prompt:       flag.String("p", DefaultPrompt, "Prompt to use"),
-		imageFile:   flag.String("image", "", "Filename for an image to include"),
+		imageFile:    flag.String("image", "", "Filename for an image to include"),
-		keepAlive:   flag.Float64("k", 0, "Keep alive duration in seconds"),
+		keepAlive:    flag.Float64("k", 0, "Keep alive duration in seconds"),
-		format:      flag.String("format", "markdown", "Output format [benchstat|csv] (default benchstat)"),
+		format:       flag.String("format", "benchstat", "Output format [benchstat|csv]"),
-		outputFile:  flag.String("output", "", "Output file for results (stdout if empty)"),
+		outputFile:   flag.String("output", "", "Output file for results (stdout if empty)"),
-		verbose:     flag.Bool("v", false, "Show system information"),
+		verbose:      flag.Bool("v", false, "Show system information"),
-		debug:       flag.Bool("debug", false, "Show debug information"),
+		debug:        flag.Bool("debug", false, "Show debug information"),
 		warmup:       flag.Int("warmup", 1, "Number of warmup requests before timing"),
 		promptTokens: flag.Int("prompt-tokens", 0, "Generate prompt targeting ~N tokens (0 = use -p prompt)"),
 		numCtx:       flag.Int("num-ctx", 0, "Context size (0 = server default)"),
 	}
 	flag.Usage = func() {
@@ -302,11 +541,12 @@ func main() {
 		fmt.Fprintf(os.Stderr, "Options:\n")
 		flag.PrintDefaults()
 		fmt.Fprintf(os.Stderr, "\nExamples:\n")
-		fmt.Fprintf(os.Stderr, "  bench -model gpt-oss:20b -epochs 3 -temperature 0.7\n")
+		fmt.Fprintf(os.Stderr, "  bench -model gemma3,llama3 -epochs 6\n")
 		fmt.Fprintf(os.Stderr, "  bench -model gemma3 -epochs 6 -prompt-tokens 512 -format csv\n")
 	}
 	flag.Parse()
-	if !slices.Contains([]string{"markdown", "benchstat", "csv"}, *fOpt.format) {
+	if !slices.Contains([]string{"benchstat", "csv"}, *fOpt.format) {
 		fmt.Fprintf(os.Stderr, "ERROR: Unknown format '%s'\n", *fOpt.format)
 		os.Exit(1)
 	}
@@ -317,5 +557,5 @@ func main() {
 		return
 	}
-	BenchmarkChat(fOpt)
+	BenchmarkModel(fOpt)
 }
--- a/cmd/bench/bench_test.go
+++ b/cmd/bench/bench_test.go
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -11,10 +11,12 @@ import (
 	"fmt"
 	"io"
 	"log"
 	"log/slog"
 	"math"
 	"net"
 	"net/http"
 	"os"
 	"os/exec"
 	"os/signal"
 	"path/filepath"
 	"runtime"
@@ -29,14 +31,20 @@ import (
 	"github.com/containerd/console"
 	"github.com/mattn/go-runewidth"
 	"github.com/olekukonko/tablewriter"
 	"github.com/pkg/browser"
 	"github.com/spf13/cobra"
 	"golang.org/x/crypto/ssh"
 	"golang.org/x/sync/errgroup"
 	"golang.org/x/term"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/cmd/config"
 	"github.com/ollama/ollama/cmd/launch"
 	"github.com/ollama/ollama/cmd/tui"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/internal/modelref"
 	"github.com/ollama/ollama/logutil"
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/progress"
 	"github.com/ollama/ollama/readline"
@@ -46,12 +54,48 @@ import (
 	"github.com/ollama/ollama/types/syncmap"
 	"github.com/ollama/ollama/version"
 	xcmd "github.com/ollama/ollama/x/cmd"
 	"github.com/ollama/ollama/x/create"
 	xcreateclient "github.com/ollama/ollama/x/create/client"
 	"github.com/ollama/ollama/x/imagegen"
 )
-const ConnectInstructions = "To sign in, navigate to:\n    %s\n\n"
+func init() {
 	// Override default selectors to use Bubbletea TUI instead of raw terminal I/O.
 	launch.DefaultSingleSelector = func(title string, items []launch.ModelItem, current string) (string, error) {
 		if !term.IsTerminal(int(os.Stdin.Fd())) || !term.IsTerminal(int(os.Stdout.Fd())) {
 			return "", fmt.Errorf("model selection requires an interactive terminal; use --model to run in headless mode")
 		}
 		tuiItems := tui.ReorderItems(tui.ConvertItems(items))
 		result, err := tui.SelectSingle(title, tuiItems, current)
 		if errors.Is(err, tui.ErrCancelled) {
 			return "", launch.ErrCancelled
 		}
 		return result, err
 	}
 	launch.DefaultMultiSelector = func(title string, items []launch.ModelItem, preChecked []string) ([]string, error) {
 		if !term.IsTerminal(int(os.Stdin.Fd())) || !term.IsTerminal(int(os.Stdout.Fd())) {
 			return nil, fmt.Errorf("model selection requires an interactive terminal; use --model to run in headless mode")
 		}
 		tuiItems := tui.ReorderItems(tui.ConvertItems(items))
 		result, err := tui.SelectMultiple(title, tuiItems, preChecked)
 		if errors.Is(err, tui.ErrCancelled) {
 			return nil, launch.ErrCancelled
 		}
 		return result, err
 	}
 	launch.DefaultSignIn = func(modelName, signInURL string) (string, error) {
 		userName, err := tui.RunSignIn(modelName, signInURL)
 		if errors.Is(err, tui.ErrCancelled) {
 			return "", launch.ErrCancelled
 		}
 		return userName, err
 	}
 	launch.DefaultConfirmPrompt = tui.RunConfirmWithOptions
 }
 const ConnectInstructions = "If your browser did not open, navigate to:\n    %s\n\n"
 // ensureThinkingSupport emits a warning if the model does not advertise thinking support
 func ensureThinkingSupport(ctx context.Context, client *api.Client, name string) {
@@ -90,6 +134,17 @@ func getModelfileName(cmd *cobra.Command) (string, error) {
 	return absName, nil
 }
 // isLocalhost returns true if the configured Ollama host is a loopback or unspecified address.
 func isLocalhost() bool {
 	host := envconfig.Host()
 	h, _, _ := net.SplitHostPort(host.Host)
 	if h == "localhost" {
 		return true
 	}
 	ip := net.ParseIP(h)
 	return ip != nil && (ip.IsLoopback() || ip.IsUnspecified())
 }
 func CreateHandler(cmd *cobra.Command, args []string) error {
 	p := progress.NewProgress(os.Stderr)
 	defer p.Stop()
@@ -102,8 +157,13 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	}
 	// Check for --experimental flag for safetensors model creation
 	// This gates both safetensors LLM and imagegen model creation
 	experimental, _ := cmd.Flags().GetBool("experimental")
 	if experimental {
 		if !isLocalhost() {
 			return errors.New("remote safetensor model creation not yet supported")
 		}
 		// Get Modelfile content - either from -f flag or default to "FROM ."
 		var reader io.Reader
 		filename, err := getModelfileName(cmd)
@@ -127,25 +187,9 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 			return fmt.Errorf("failed to parse Modelfile: %w", err)
 		}
-		// Extract FROM path and configuration
+		modelDir, mfConfig, err := xcreateclient.ConfigFromModelfile(modelfile)
-		var modelDir string
+		if err != nil {
-		mfConfig := &xcreateclient.ModelfileConfig{}
+			return err
 		for _, cmd := range modelfile.Commands {
 			switch cmd.Name {
 			case "model":
 				modelDir = cmd.Args
 			case "template":
 				mfConfig.Template = cmd.Args
 			case "system":
 				mfConfig.System = cmd.Args
 			case "license":
 				mfConfig.License = cmd.Args
 			}
 		}
 		if modelDir == "" {
 			modelDir = "."
 		}
 		// Resolve relative paths based on Modelfile location
@@ -162,20 +206,12 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 		}, p)
 	}
 	// Standard Modelfile + API path
 	var reader io.Reader
 	filename, err := getModelfileName(cmd)
 	if os.IsNotExist(err) {
 		if filename == "" {
 			// No Modelfile found - check if current directory is an image gen model
 			if create.IsTensorModelDir(".") {
 				quantize, _ := cmd.Flags().GetString("quantize")
 				return xcreateclient.CreateModel(xcreateclient.CreateOptions{
 					ModelName: modelName,
 					ModelDir:  ".",
 					Quantize:  quantize,
 				}, p)
 			}
 			reader = strings.NewReader("FROM .\n")
 		} else {
 			return errModelfileNotFound
@@ -361,18 +397,35 @@ func loadOrUnloadModel(cmd *cobra.Command, opts *runOptions) error {
 		return err
 	}
 	requestedCloud := modelref.HasExplicitCloudSource(opts.Model)
 	if info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: opts.Model}); err != nil {
 		return err
-	} else if info.RemoteHost != "" {
+	} else if info.RemoteHost != "" || requestedCloud {
 		// Cloud model, no need to load/unload
-		if opts.ShowConnect {
+
-			p.StopAndClear()
+		isCloud := requestedCloud || strings.HasPrefix(info.RemoteHost, "https://ollama.com")
-			if strings.HasPrefix(info.RemoteHost, "https://ollama.com") {
+
-				fmt.Fprintf(os.Stderr, "Connecting to '%s' on 'ollama.com' ⚡\n", info.RemoteModel)
+		// Check if user is signed in for ollama.com cloud models
-			} else {
+		if isCloud {
-				fmt.Fprintf(os.Stderr, "Connecting to '%s' on '%s'\n", info.RemoteModel, info.RemoteHost)
+			if _, err := client.Whoami(cmd.Context()); err != nil {
 				return err
 			}
 		}
 		if opts.ShowConnect {
 			p.StopAndClear()
 			remoteModel := info.RemoteModel
 			if remoteModel == "" {
 				remoteModel = opts.Model
 			}
 			if isCloud {
 				fmt.Fprintf(os.Stderr, "Connecting to '%s' on 'ollama.com' ⚡\n", remoteModel)
 			} else {
 				fmt.Fprintf(os.Stderr, "Connecting to '%s' on '%s'\n", remoteModel, info.RemoteHost)
 			}
 		}
 		return nil
 	}
@@ -441,6 +494,64 @@ func generateEmbedding(cmd *cobra.Command, modelName, input string, keepAlive *a
 	return nil
 }
 // TODO(parthsareen): consolidate with TUI signin flow
 func handleCloudAuthorizationError(err error) bool {
 	var authErr api.AuthorizationError
 	if errors.As(err, &authErr) && authErr.StatusCode == http.StatusUnauthorized {
 		fmt.Printf("You need to be signed in to Ollama to run Cloud models.\n\n")
 		if authErr.SigninURL != "" {
 			fmt.Printf(ConnectInstructions, authErr.SigninURL)
 		}
 		return true
 	}
 	return false
 }
 // TEMP(drifkin): To match legacy `ollama run some-model:cloud` behavior, we
 // best-effort pull cloud stub files for any explicit cloud source models.
 // Remove this once `/api/tags` is cloud-aware.
 func ensureCloudStub(ctx context.Context, client *api.Client, modelName string) {
 	if !modelref.HasExplicitCloudSource(modelName) {
 		return
 	}
 	normalizedName, _, err := modelref.NormalizePullName(modelName)
 	if err != nil {
 		slog.Warn("failed to normalize pull name", "model", modelName, "error", err, "normalizedName", normalizedName)
 		return
 	}
 	listResp, err := client.List(ctx)
 	if err != nil {
 		slog.Warn("failed to list models", "error", err)
 		return
 	}
 	if hasListedModelName(listResp.Models, modelName) || hasListedModelName(listResp.Models, normalizedName) {
 		return
 	}
 	logutil.Trace("pulling cloud stub", "model", modelName, "normalizedName", normalizedName)
 	err = client.Pull(ctx, &api.PullRequest{
 		Model: normalizedName,
 	}, func(api.ProgressResponse) error {
 		return nil
 	})
 	if err != nil {
 		slog.Warn("failed to pull cloud stub", "model", modelName, "error", err)
 	}
 }
 func hasListedModelName(models []api.ListModelResponse, name string) bool {
 	for _, m := range models {
 		if strings.EqualFold(m.Name, name) || strings.EqualFold(m.Model, name) {
 			return true
 		}
 	}
 	return false
 }
 func RunHandler(cmd *cobra.Command, args []string) error {
 	interactive := true
@@ -537,12 +648,16 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 	}
 	name := args[0]
 	requestedCloud := modelref.HasExplicitCloudSource(name)
 	info, err := func() (*api.ShowResponse, error) {
 		showReq := &api.ShowRequest{Name: name}
 		info, err := client.Show(cmd.Context(), showReq)
 		var se api.StatusError
 		if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
 			if requestedCloud {
 				return nil, err
 			}
 			if err := PullHandler(cmd, []string{name}); err != nil {
 				return nil, err
 			}
@@ -551,15 +666,21 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 		return info, err
 	}()
 	if err != nil {
 		if handleCloudAuthorizationError(err) {
 			return nil
 		}
 		return err
 	}
 	ensureCloudStub(cmd.Context(), client, name)
 	opts.Think, err = inferThinkingOption(&info.Capabilities, &opts, thinkFlag.Changed)
 	if err != nil {
 		return err
 	}
-	opts.MultiModal = slices.Contains(info.Capabilities, model.CapabilityVision)
+	audioCapable := slices.Contains(info.Capabilities, model.CapabilityAudio)
 	opts.MultiModal = slices.Contains(info.Capabilities, model.CapabilityVision) || audioCapable
 	// TODO: remove the projector info and vision info checks below,
 	// these are left in for backwards compatibility with older servers
@@ -574,7 +695,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 		}
 	}
-	opts.ParentModel = info.Details.ParentModel
+	applyShowResponseToRunOptions(&opts, info)
 	// Check if this is an embedding model
 	isEmbeddingModel := slices.Contains(info.Capabilities, model.CapabilityEmbedding)
@@ -645,7 +766,13 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 		return generateInteractive(cmd, opts)
 	}
-	return generate(cmd, opts)
+	if err := generate(cmd, opts); err != nil {
 		if handleCloudAuthorizationError(err) {
 			return nil
 		}
 		return err
 	}
 	return nil
 }
 func SigninHandler(cmd *cobra.Command, args []string) error {
@@ -662,6 +789,7 @@ func SigninHandler(cmd *cobra.Command, args []string) error {
 			fmt.Println()
 			if aErr.SigninURL != "" {
 				_ = browser.OpenURL(aErr.SigninURL)
 				fmt.Printf(ConnectInstructions, aErr.SigninURL)
 			}
 			return nil
@@ -1018,8 +1146,10 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
 		}
 		if resp.ModelInfo != nil {
-			arch := resp.ModelInfo["general.architecture"].(string)
+			arch, _ := resp.ModelInfo["general.architecture"].(string)
-			rows = append(rows, []string{"", "architecture", arch})
+			if arch != "" {
 				rows = append(rows, []string{"", "architecture", arch})
 			}
 			var paramStr string
 			if resp.Details.ParameterSize != "" {
@@ -1029,7 +1159,9 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
 					paramStr = format.HumanNumber(uint64(f))
 				}
 			}
-			rows = append(rows, []string{"", "parameters", paramStr})
+			if paramStr != "" {
 				rows = append(rows, []string{"", "parameters", paramStr})
 			}
 			if v, ok := resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)]; ok {
 				if f, ok := v.(float64); ok {
@@ -1279,23 +1411,30 @@ func PullHandler(cmd *cobra.Command, args []string) error {
 type generateContextKey string
 type runOptions struct {
-	Model        string
+	Model          string
-	ParentModel  string
+	ParentModel    string
-	Prompt       string
+	LoadedMessages []api.Message
-	Messages     []api.Message
+	Prompt         string
-	WordWrap     bool
+	Messages       []api.Message
-	Format       string
+	WordWrap       bool
-	System       string
+	Format         string
-	Images       []api.ImageData
+	System         string
-	Options      map[string]any
+	Images         []api.ImageData
-	MultiModal   bool
+	Options        map[string]any
-	KeepAlive    *api.Duration
+	MultiModal     bool
-	Think        *api.ThinkValue
+	KeepAlive      *api.Duration
-	HideThinking bool
+	Think          *api.ThinkValue
-	ShowConnect  bool
+	HideThinking   bool
 	ShowConnect    bool
 }
 func (r runOptions) Copy() runOptions {
 	var loadedMessages []api.Message
 	if r.LoadedMessages != nil {
 		loadedMessages = make([]api.Message, len(r.LoadedMessages))
 		copy(loadedMessages, r.LoadedMessages)
 	}
 	var messages []api.Message
 	if r.Messages != nil {
 		messages = make([]api.Message, len(r.Messages))
@@ -1323,23 +1462,29 @@ func (r runOptions) Copy() runOptions {
 	}
 	return runOptions{
-		Model:        r.Model,
+		Model:          r.Model,
-		ParentModel:  r.ParentModel,
+		ParentModel:    r.ParentModel,
-		Prompt:       r.Prompt,
+		LoadedMessages: loadedMessages,
-		Messages:     messages,
+		Prompt:         r.Prompt,
-		WordWrap:     r.WordWrap,
+		Messages:       messages,
-		Format:       r.Format,
+		WordWrap:       r.WordWrap,
-		System:       r.System,
+		Format:         r.Format,
-		Images:       images,
+		System:         r.System,
-		Options:      opts,
+		Images:         images,
-		MultiModal:   r.MultiModal,
+		Options:        opts,
-		KeepAlive:    r.KeepAlive,
+		MultiModal:     r.MultiModal,
-		Think:        think,
+		KeepAlive:      r.KeepAlive,
-		HideThinking: r.HideThinking,
+		Think:          think,
-		ShowConnect:  r.ShowConnect,
+		HideThinking:   r.HideThinking,
 		ShowConnect:    r.ShowConnect,
 	}
 }
 func applyShowResponseToRunOptions(opts *runOptions, info *api.ShowResponse) {
 	opts.ParentModel = info.Details.ParentModel
 	opts.LoadedMessages = slices.Clone(info.Messages)
 }
 type displayResponseState struct {
 	lineLength int
 	wordBuffer string
@@ -1347,6 +1492,9 @@ type displayResponseState struct {
 func displayResponse(content string, wordWrap bool, state *displayResponseState) {
 	termWidth, _, _ := term.GetSize(int(os.Stdout.Fd()))
 	if termWidth == 0 {
 		termWidth = 80
 	}
 	if wordWrap && termWidth >= 10 {
 		for _, ch := range content {
 			if state.lineLength+1 > termWidth-5 {
@@ -1745,7 +1893,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
 			return err
 		}
 		if err := startApp(cmd.Context(), client); err != nil {
-			return fmt.Errorf("ollama server not responding - %w", err)
+			return err
 		}
 	}
 	return nil
@@ -1786,6 +1934,148 @@ Environment Variables:
 	cmd.SetUsageTemplate(cmd.UsageTemplate() + envUsage)
 }
 // ensureServerRunning checks if the ollama server is running and starts it in the background if not.
 func ensureServerRunning(ctx context.Context) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
 	// Check if server is already running
 	if err := client.Heartbeat(ctx); err == nil {
 		return nil // server is already running
 	}
 	// Server not running, start it in the background
 	exe, err := os.Executable()
 	if err != nil {
 		return fmt.Errorf("could not find executable: %w", err)
 	}
 	serverCmd := exec.CommandContext(ctx, exe, "serve")
 	serverCmd.Env = os.Environ()
 	serverCmd.SysProcAttr = backgroundServerSysProcAttr()
 	if err := serverCmd.Start(); err != nil {
 		return fmt.Errorf("failed to start server: %w", err)
 	}
 	// Wait for the server to be ready
 	for {
 		time.Sleep(500 * time.Millisecond)
 		if err := client.Heartbeat(ctx); err == nil {
 			return nil // server has started
 		}
 	}
 }
 func launchInteractiveModel(cmd *cobra.Command, modelName string) error {
 	opts := runOptions{
 		Model:       modelName,
 		WordWrap:    os.Getenv("TERM") == "xterm-256color",
 		Options:     map[string]any{},
 		ShowConnect: true,
 	}
 	// loadOrUnloadModel is cloud-safe here: remote/cloud models skip local preload
 	// and only validate auth/connectivity before interactive chat starts.
 	if err := loadOrUnloadModel(cmd, &opts); err != nil {
 		return fmt.Errorf("error loading model: %w", err)
 	}
 	if err := generateInteractive(cmd, opts); err != nil {
 		return fmt.Errorf("error running model: %w", err)
 	}
 	return nil
 }
 // runInteractiveTUI runs the main interactive TUI menu.
 func runInteractiveTUI(cmd *cobra.Command) {
 	// Ensure the server is running before showing the TUI
 	if err := ensureServerRunning(cmd.Context()); err != nil {
 		fmt.Fprintf(os.Stderr, "Error starting server: %v\n", err)
 		return
 	}
 	deps := launcherDeps{
 		buildState:        launch.BuildLauncherState,
 		runMenu:           tui.RunMenu,
 		resolveRunModel:   launch.ResolveRunModel,
 		launchIntegration: launch.LaunchIntegration,
 		runModel:          launchInteractiveModel,
 	}
 	for {
 		continueLoop, err := runInteractiveTUIStep(cmd, deps)
 		if err != nil {
 			fmt.Fprintf(os.Stderr, "Error: %v\n", err)
 		}
 		if !continueLoop {
 			return
 		}
 	}
 }
 type launcherDeps struct {
 	buildState        func(context.Context) (*launch.LauncherState, error)
 	runMenu           func(*launch.LauncherState) (tui.TUIAction, error)
 	resolveRunModel   func(context.Context, launch.RunModelRequest) (string, error)
 	launchIntegration func(context.Context, launch.IntegrationLaunchRequest) error
 	runModel          func(*cobra.Command, string) error
 }
 func runInteractiveTUIStep(cmd *cobra.Command, deps launcherDeps) (bool, error) {
 	state, err := deps.buildState(cmd.Context())
 	if err != nil {
 		return false, fmt.Errorf("build launcher state: %w", err)
 	}
 	action, err := deps.runMenu(state)
 	if err != nil {
 		return false, fmt.Errorf("run launcher menu: %w", err)
 	}
 	return runLauncherAction(cmd, action, deps)
 }
 func saveLauncherSelection(action tui.TUIAction) {
 	// Best effort only: this affects menu recall, not launch correctness.
 	_ = config.SetLastSelection(action.LastSelection())
 }
 func runLauncherAction(cmd *cobra.Command, action tui.TUIAction, deps launcherDeps) (bool, error) {
 	switch action.Kind {
 	case tui.TUIActionNone:
 		return false, nil
 	case tui.TUIActionRunModel:
 		saveLauncherSelection(action)
 		modelName, err := deps.resolveRunModel(cmd.Context(), action.RunModelRequest())
 		if errors.Is(err, launch.ErrCancelled) {
 			return true, nil
 		}
 		if err != nil {
 			return true, fmt.Errorf("selecting model: %w", err)
 		}
 		if err := deps.runModel(cmd, modelName); err != nil {
 			return true, err
 		}
 		return true, nil
 	case tui.TUIActionLaunchIntegration:
 		saveLauncherSelection(action)
 		err := deps.launchIntegration(cmd.Context(), action.IntegrationLaunchRequest())
 		if errors.Is(err, launch.ErrCancelled) {
 			return true, nil
 		}
 		if err != nil {
 			return true, fmt.Errorf("launching %s: %w", action.Integration, err)
 		}
 		// VS Code is a GUI app — exit the TUI loop after launching
 		if action.Integration == "vscode" {
 			return false, nil
 		}
 		return true, nil
 	default:
 		return false, fmt.Errorf("unknown launcher action: %d", action.Kind)
 	}
 }
 func NewCLI() *cobra.Command {
 	log.SetFlags(log.LstdFlags | log.Lshortfile)
 	cobra.EnableCommandSorting = false
@@ -1808,11 +2098,13 @@ func NewCLI() *cobra.Command {
 				return
 			}
-			cmd.Print(cmd.UsageString())
+			runInteractiveTUI(cmd)
 		},
 	}
 	rootCmd.Flags().BoolP("version", "v", false, "Show version information")
 	rootCmd.Flags().Bool("verbose", false, "Show timings for response")
 	rootCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
 	createCmd := &cobra.Command{
 		Use:   "create MODEL",
@@ -1872,6 +2164,9 @@ func NewCLI() *cobra.Command {
 	// Image generation flags (width, height, steps, seed, etc.)
 	imagegen.RegisterFlags(runCmd)
 	runCmd.Flags().Bool("imagegen", false, "Use the imagegen runner for LLM inference")
 	runCmd.Flags().MarkHidden("imagegen")
 	stopCmd := &cobra.Command{
 		Use:     "stop MODEL",
 		Short:   "Stop a running model",
@@ -1883,7 +2178,7 @@ func NewCLI() *cobra.Command {
 	serveCmd := &cobra.Command{
 		Use:     "serve",
 		Aliases: []string{"start"},
-		Short:   "Start ollama",
+		Short:   "Start Ollama",
 		Args:    cobra.ExactArgs(0),
 		RunE:    RunServer,
 	}
@@ -1916,6 +2211,15 @@ func NewCLI() *cobra.Command {
 		RunE:    SigninHandler,
 	}
 	loginCmd := &cobra.Command{
 		Use:     "login",
 		Short:   "Sign in to ollama.com",
 		Hidden:  true,
 		Args:    cobra.ExactArgs(0),
 		PreRunE: checkServerHeartbeat,
 		RunE:    SigninHandler,
 	}
 	signoutCmd := &cobra.Command{
 		Use:     "signout",
 		Short:   "Sign out from ollama.com",
@@ -1924,6 +2228,15 @@ func NewCLI() *cobra.Command {
 		RunE:    SignoutHandler,
 	}
 	logoutCmd := &cobra.Command{
 		Use:     "logout",
 		Short:   "Sign out from ollama.com",
 		Hidden:  true,
 		Args:    cobra.ExactArgs(0),
 		PreRunE: checkServerHeartbeat,
 		RunE:    SignoutHandler,
 	}
 	listCmd := &cobra.Command{
 		Use:     "list",
 		Aliases: []string{"ls"},
@@ -1986,7 +2299,7 @@ func NewCLI() *cobra.Command {
 		switch cmd {
 		case runCmd:
 			imagegen.AppendFlagsDocs(cmd)
-			appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
+			appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_EDITOR"], envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
 		case serveCmd:
 			appendEnvDocs(cmd, []envconfig.EnvVar{
 				envVars["OLLAMA_DEBUG"],
@@ -1997,6 +2310,7 @@ func NewCLI() *cobra.Command {
 				envVars["OLLAMA_MAX_QUEUE"],
 				envVars["OLLAMA_MODELS"],
 				envVars["OLLAMA_NUM_PARALLEL"],
 				envVars["OLLAMA_NO_CLOUD"],
 				envVars["OLLAMA_NOPRUNE"],
 				envVars["OLLAMA_ORIGINS"],
 				envVars["OLLAMA_SCHED_SPREAD"],
@@ -2020,12 +2334,15 @@ func NewCLI() *cobra.Command {
 		pullCmd,
 		pushCmd,
 		signinCmd,
 		loginCmd,
 		signoutCmd,
 		logoutCmd,
 		listCmd,
 		psCmd,
 		copyCmd,
 		deleteCmd,
 		runnerCmd,
 		launch.LaunchCmd(checkServerHeartbeat, runInteractiveTUI),
 	)
 	return rootCmd
--- a/cmd/cmd_launcher_test.go
+++ b/cmd/cmd_launcher_test.go
@@ -0,0 +1,270 @@
 package cmd
 import (
 	"context"
 	"testing"
 	"github.com/spf13/cobra"
 	"github.com/ollama/ollama/cmd/config"
 	"github.com/ollama/ollama/cmd/launch"
 	"github.com/ollama/ollama/cmd/tui"
 )
 func setCmdTestHome(t *testing.T, dir string) {
 	t.Helper()
 	t.Setenv("HOME", dir)
 	t.Setenv("USERPROFILE", dir)
 }
 func unexpectedRunModelResolution(t *testing.T) func(context.Context, launch.RunModelRequest) (string, error) {
 	t.Helper()
 	return func(ctx context.Context, req launch.RunModelRequest) (string, error) {
 		t.Fatalf("did not expect run-model resolution: %+v", req)
 		return "", nil
 	}
 }
 func unexpectedIntegrationLaunch(t *testing.T) func(context.Context, launch.IntegrationLaunchRequest) error {
 	t.Helper()
 	return func(ctx context.Context, req launch.IntegrationLaunchRequest) error {
 		t.Fatalf("did not expect integration launch: %+v", req)
 		return nil
 	}
 }
 func unexpectedModelLaunch(t *testing.T) func(*cobra.Command, string) error {
 	t.Helper()
 	return func(cmd *cobra.Command, model string) error {
 		t.Fatalf("did not expect chat launch: %s", model)
 		return nil
 	}
 }
 func TestRunInteractiveTUI_RunModelActionsUseResolveRunModel(t *testing.T) {
 	tests := []struct {
 		name      string
 		action    tui.TUIAction
 		wantForce bool
 		wantModel string
 	}{
 		{
 			name:      "enter uses saved model flow",
 			action:    tui.TUIAction{Kind: tui.TUIActionRunModel},
 			wantModel: "qwen3:8b",
 		},
 		{
 			name:      "right forces picker",
 			action:    tui.TUIAction{Kind: tui.TUIActionRunModel, ForceConfigure: true},
 			wantForce: true,
 			wantModel: "glm-5:cloud",
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			setCmdTestHome(t, t.TempDir())
 			var menuCalls int
 			runMenu := func(state *launch.LauncherState) (tui.TUIAction, error) {
 				menuCalls++
 				if menuCalls == 1 {
 					return tt.action, nil
 				}
 				return tui.TUIAction{Kind: tui.TUIActionNone}, nil
 			}
 			var gotReq launch.RunModelRequest
 			var launched string
 			deps := launcherDeps{
 				buildState: func(ctx context.Context) (*launch.LauncherState, error) {
 					return &launch.LauncherState{}, nil
 				},
 				runMenu: runMenu,
 				resolveRunModel: func(ctx context.Context, req launch.RunModelRequest) (string, error) {
 					gotReq = req
 					return tt.wantModel, nil
 				},
 				launchIntegration: unexpectedIntegrationLaunch(t),
 				runModel: func(cmd *cobra.Command, model string) error {
 					launched = model
 					return nil
 				},
 			}
 			cmd := &cobra.Command{}
 			cmd.SetContext(context.Background())
 			for {
 				continueLoop, err := runInteractiveTUIStep(cmd, deps)
 				if err != nil {
 					t.Fatalf("unexpected step error: %v", err)
 				}
 				if !continueLoop {
 					break
 				}
 			}
 			if gotReq.ForcePicker != tt.wantForce {
 				t.Fatalf("expected ForcePicker=%v, got %v", tt.wantForce, gotReq.ForcePicker)
 			}
 			if launched != tt.wantModel {
 				t.Fatalf("expected interactive launcher to run %q, got %q", tt.wantModel, launched)
 			}
 			if got := config.LastSelection(); got != "run" {
 				t.Fatalf("expected last selection to be run, got %q", got)
 			}
 		})
 	}
 }
 func TestRunInteractiveTUI_IntegrationActionsUseLaunchIntegration(t *testing.T) {
 	tests := []struct {
 		name      string
 		action    tui.TUIAction
 		wantForce bool
 	}{
 		{
 			name:   "enter launches integration",
 			action: tui.TUIAction{Kind: tui.TUIActionLaunchIntegration, Integration: "claude"},
 		},
 		{
 			name:      "right forces configure",
 			action:    tui.TUIAction{Kind: tui.TUIActionLaunchIntegration, Integration: "claude", ForceConfigure: true},
 			wantForce: true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			setCmdTestHome(t, t.TempDir())
 			var menuCalls int
 			runMenu := func(state *launch.LauncherState) (tui.TUIAction, error) {
 				menuCalls++
 				if menuCalls == 1 {
 					return tt.action, nil
 				}
 				return tui.TUIAction{Kind: tui.TUIActionNone}, nil
 			}
 			var gotReq launch.IntegrationLaunchRequest
 			deps := launcherDeps{
 				buildState: func(ctx context.Context) (*launch.LauncherState, error) {
 					return &launch.LauncherState{}, nil
 				},
 				runMenu:         runMenu,
 				resolveRunModel: unexpectedRunModelResolution(t),
 				launchIntegration: func(ctx context.Context, req launch.IntegrationLaunchRequest) error {
 					gotReq = req
 					return nil
 				},
 				runModel: unexpectedModelLaunch(t),
 			}
 			cmd := &cobra.Command{}
 			cmd.SetContext(context.Background())
 			for {
 				continueLoop, err := runInteractiveTUIStep(cmd, deps)
 				if err != nil {
 					t.Fatalf("unexpected step error: %v", err)
 				}
 				if !continueLoop {
 					break
 				}
 			}
 			if gotReq.Name != "claude" {
 				t.Fatalf("expected integration name to be passed through, got %q", gotReq.Name)
 			}
 			if gotReq.ForceConfigure != tt.wantForce {
 				t.Fatalf("expected ForceConfigure=%v, got %v", tt.wantForce, gotReq.ForceConfigure)
 			}
 			if got := config.LastSelection(); got != "claude" {
 				t.Fatalf("expected last selection to be claude, got %q", got)
 			}
 		})
 	}
 }
 func TestRunLauncherAction_RunModelContinuesAfterCancellation(t *testing.T) {
 	setCmdTestHome(t, t.TempDir())
 	cmd := &cobra.Command{}
 	cmd.SetContext(context.Background())
 	continueLoop, err := runLauncherAction(cmd, tui.TUIAction{Kind: tui.TUIActionRunModel}, launcherDeps{
 		buildState: nil,
 		runMenu:    nil,
 		resolveRunModel: func(ctx context.Context, req launch.RunModelRequest) (string, error) {
 			return "", launch.ErrCancelled
 		},
 		launchIntegration: unexpectedIntegrationLaunch(t),
 		runModel:          unexpectedModelLaunch(t),
 	})
 	if err != nil {
 		t.Fatalf("expected nil error on cancellation, got %v", err)
 	}
 	if !continueLoop {
 		t.Fatal("expected cancellation to continue the menu loop")
 	}
 }
 func TestRunLauncherAction_VSCodeExitsTUILoop(t *testing.T) {
 	setCmdTestHome(t, t.TempDir())
 	cmd := &cobra.Command{}
 	cmd.SetContext(context.Background())
 	// VS Code should exit the TUI loop (return false) after a successful launch.
 	continueLoop, err := runLauncherAction(cmd, tui.TUIAction{Kind: tui.TUIActionLaunchIntegration, Integration: "vscode"}, launcherDeps{
 		resolveRunModel: unexpectedRunModelResolution(t),
 		launchIntegration: func(ctx context.Context, req launch.IntegrationLaunchRequest) error {
 			return nil
 		},
 		runModel: unexpectedModelLaunch(t),
 	})
 	if err != nil {
 		t.Fatalf("expected nil error, got %v", err)
 	}
 	if continueLoop {
 		t.Fatal("expected vscode launch to exit the TUI loop (return false)")
 	}
 	// Other integrations should continue the TUI loop (return true).
 	continueLoop, err = runLauncherAction(cmd, tui.TUIAction{Kind: tui.TUIActionLaunchIntegration, Integration: "claude"}, launcherDeps{
 		resolveRunModel: unexpectedRunModelResolution(t),
 		launchIntegration: func(ctx context.Context, req launch.IntegrationLaunchRequest) error {
 			return nil
 		},
 		runModel: unexpectedModelLaunch(t),
 	})
 	if err != nil {
 		t.Fatalf("expected nil error, got %v", err)
 	}
 	if !continueLoop {
 		t.Fatal("expected non-vscode integration to continue the TUI loop (return true)")
 	}
 }
 func TestRunLauncherAction_IntegrationContinuesAfterCancellation(t *testing.T) {
 	setCmdTestHome(t, t.TempDir())
 	cmd := &cobra.Command{}
 	cmd.SetContext(context.Background())
 	continueLoop, err := runLauncherAction(cmd, tui.TUIAction{Kind: tui.TUIActionLaunchIntegration, Integration: "claude"}, launcherDeps{
 		buildState:      nil,
 		runMenu:         nil,
 		resolveRunModel: unexpectedRunModelResolution(t),
 		launchIntegration: func(ctx context.Context, req launch.IntegrationLaunchRequest) error {
 			return launch.ErrCancelled
 		},
 		runModel: unexpectedModelLaunch(t),
 	})
 	if err != nil {
 		t.Fatalf("expected nil error on cancellation, got %v", err)
 	}
 	if !continueLoop {
 		t.Fatal("expected cancellation to continue the menu loop")
 	}
 }
--- a/cmd/cmd_test.go
+++ b/cmd/cmd_test.go
@@ -3,6 +3,7 @@ package cmd
 import (
 	"bytes"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -300,7 +301,7 @@ Weigh anchor!
 				ParameterSize:     "7B",
 				QuantizationLevel: "FP16",
 			},
-			Requires: "0.14.0",
+			Requires: "0.19.0",
 		}, false, &b); err != nil {
 			t.Fatal(err)
 		}
@@ -309,10 +310,17 @@ Weigh anchor!
    architecture    test      
    parameters      7B        
    quantization    FP16      
-    requires        0.14.0    
+    requires        0.19.0
 `
-		if diff := cmp.Diff(expect, b.String()); diff != "" {
+		trimLinePadding := func(s string) string {
 			lines := strings.Split(s, "\n")
 			for i, line := range lines {
 				lines[i] = strings.TrimRight(line, " \t\r")
 			}
 			return strings.Join(lines, "\n")
 		}
 		if diff := cmp.Diff(trimLinePadding(expect), trimLinePadding(b.String())); diff != "" {
 			t.Errorf("unexpected output (-want +got):\n%s", diff)
 		}
 	})
@@ -704,6 +712,347 @@ func TestRunEmbeddingModelNoInput(t *testing.T) {
 	}
 }
 func TestRunHandler_CloudAuthErrorOnShow_PrintsSigninMessage(t *testing.T) {
 	var generateCalled bool
 	mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch {
 		case r.URL.Path == "/api/show" && r.Method == http.MethodPost:
 			w.WriteHeader(http.StatusUnauthorized)
 			if err := json.NewEncoder(w).Encode(map[string]string{
 				"error":      "unauthorized",
 				"signin_url": "https://ollama.com/signin",
 			}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		case r.URL.Path == "/api/generate" && r.Method == http.MethodPost:
 			generateCalled = true
 			w.WriteHeader(http.StatusOK)
 			if err := json.NewEncoder(w).Encode(api.GenerateResponse{Done: true}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		default:
 			http.NotFound(w, r)
 		}
 	}))
 	t.Setenv("OLLAMA_HOST", mockServer.URL)
 	t.Cleanup(mockServer.Close)
 	cmd := &cobra.Command{}
 	cmd.SetContext(t.Context())
 	cmd.Flags().String("keepalive", "", "")
 	cmd.Flags().Bool("truncate", false, "")
 	cmd.Flags().Int("dimensions", 0, "")
 	cmd.Flags().Bool("verbose", false, "")
 	cmd.Flags().Bool("insecure", false, "")
 	cmd.Flags().Bool("nowordwrap", false, "")
 	cmd.Flags().String("format", "", "")
 	cmd.Flags().String("think", "", "")
 	cmd.Flags().Bool("hidethinking", false, "")
 	oldStdout := os.Stdout
 	readOut, writeOut, _ := os.Pipe()
 	os.Stdout = writeOut
 	t.Cleanup(func() { os.Stdout = oldStdout })
 	err := RunHandler(cmd, []string{"gpt-oss:20b:cloud", "hi"})
 	_ = writeOut.Close()
 	var out bytes.Buffer
 	_, _ = io.Copy(&out, readOut)
 	if err != nil {
 		t.Fatalf("RunHandler returned error: %v", err)
 	}
 	if generateCalled {
 		t.Fatal("expected run to stop before /api/generate after unauthorized /api/show")
 	}
 	if !strings.Contains(out.String(), "You need to be signed in to Ollama to run Cloud models.") {
 		t.Fatalf("expected sign-in guidance message, got %q", out.String())
 	}
 	if !strings.Contains(out.String(), "https://ollama.com/signin") {
 		t.Fatalf("expected signin_url in output, got %q", out.String())
 	}
 }
 func TestRunHandler_CloudAuthErrorOnGenerate_PrintsSigninMessage(t *testing.T) {
 	mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch {
 		case r.URL.Path == "/api/show" && r.Method == http.MethodPost:
 			w.WriteHeader(http.StatusOK)
 			if err := json.NewEncoder(w).Encode(api.ShowResponse{
 				Capabilities: []model.Capability{model.CapabilityCompletion},
 			}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		case r.URL.Path == "/api/generate" && r.Method == http.MethodPost:
 			w.WriteHeader(http.StatusUnauthorized)
 			if err := json.NewEncoder(w).Encode(map[string]string{
 				"error":      "unauthorized",
 				"signin_url": "https://ollama.com/signin",
 			}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		default:
 			http.NotFound(w, r)
 		}
 	}))
 	t.Setenv("OLLAMA_HOST", mockServer.URL)
 	t.Cleanup(mockServer.Close)
 	cmd := &cobra.Command{}
 	cmd.SetContext(t.Context())
 	cmd.Flags().String("keepalive", "", "")
 	cmd.Flags().Bool("truncate", false, "")
 	cmd.Flags().Int("dimensions", 0, "")
 	cmd.Flags().Bool("verbose", false, "")
 	cmd.Flags().Bool("insecure", false, "")
 	cmd.Flags().Bool("nowordwrap", false, "")
 	cmd.Flags().String("format", "", "")
 	cmd.Flags().String("think", "", "")
 	cmd.Flags().Bool("hidethinking", false, "")
 	oldStdout := os.Stdout
 	readOut, writeOut, _ := os.Pipe()
 	os.Stdout = writeOut
 	t.Cleanup(func() { os.Stdout = oldStdout })
 	err := RunHandler(cmd, []string{"gpt-oss:20b:cloud", "hi"})
 	_ = writeOut.Close()
 	var out bytes.Buffer
 	_, _ = io.Copy(&out, readOut)
 	if err != nil {
 		t.Fatalf("RunHandler returned error: %v", err)
 	}
 	if !strings.Contains(out.String(), "You need to be signed in to Ollama to run Cloud models.") {
 		t.Fatalf("expected sign-in guidance message, got %q", out.String())
 	}
 	if !strings.Contains(out.String(), "https://ollama.com/signin") {
 		t.Fatalf("expected signin_url in output, got %q", out.String())
 	}
 }
 func TestRunHandler_ExplicitCloudStubMissing_PullsNormalizedNameTEMP(t *testing.T) {
 	var pulledModel string
 	var generateCalled bool
 	mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch {
 		case r.URL.Path == "/api/show" && r.Method == http.MethodPost:
 			w.WriteHeader(http.StatusOK)
 			if err := json.NewEncoder(w).Encode(api.ShowResponse{
 				Capabilities: []model.Capability{model.CapabilityCompletion},
 				RemoteModel:  "gpt-oss:20b",
 			}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		case r.URL.Path == "/api/tags" && r.Method == http.MethodGet:
 			w.WriteHeader(http.StatusOK)
 			if err := json.NewEncoder(w).Encode(api.ListResponse{Models: nil}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		case r.URL.Path == "/api/pull" && r.Method == http.MethodPost:
 			var req api.PullRequest
 			if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 				http.Error(w, err.Error(), http.StatusBadRequest)
 				return
 			}
 			pulledModel = req.Model
 			w.WriteHeader(http.StatusOK)
 			if err := json.NewEncoder(w).Encode(api.ProgressResponse{Status: "success"}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		case r.URL.Path == "/api/generate" && r.Method == http.MethodPost:
 			generateCalled = true
 			w.WriteHeader(http.StatusOK)
 			if err := json.NewEncoder(w).Encode(api.GenerateResponse{Done: true}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		default:
 			http.NotFound(w, r)
 		}
 	}))
 	t.Setenv("OLLAMA_HOST", mockServer.URL)
 	t.Cleanup(mockServer.Close)
 	cmd := &cobra.Command{}
 	cmd.SetContext(t.Context())
 	cmd.Flags().String("keepalive", "", "")
 	cmd.Flags().Bool("truncate", false, "")
 	cmd.Flags().Int("dimensions", 0, "")
 	cmd.Flags().Bool("verbose", false, "")
 	cmd.Flags().Bool("insecure", false, "")
 	cmd.Flags().Bool("nowordwrap", false, "")
 	cmd.Flags().String("format", "", "")
 	cmd.Flags().String("think", "", "")
 	cmd.Flags().Bool("hidethinking", false, "")
 	err := RunHandler(cmd, []string{"gpt-oss:20b:cloud", "hi"})
 	if err != nil {
 		t.Fatalf("RunHandler returned error: %v", err)
 	}
 	if pulledModel != "gpt-oss:20b-cloud" {
 		t.Fatalf("expected normalized pull model %q, got %q", "gpt-oss:20b-cloud", pulledModel)
 	}
 	if !generateCalled {
 		t.Fatal("expected /api/generate to be called")
 	}
 }
 func TestRunHandler_ExplicitCloudStubPresent_SkipsPullTEMP(t *testing.T) {
 	var pullCalled bool
 	var generateCalled bool
 	mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch {
 		case r.URL.Path == "/api/show" && r.Method == http.MethodPost:
 			w.WriteHeader(http.StatusOK)
 			if err := json.NewEncoder(w).Encode(api.ShowResponse{
 				Capabilities: []model.Capability{model.CapabilityCompletion},
 				RemoteModel:  "gpt-oss:20b",
 			}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		case r.URL.Path == "/api/tags" && r.Method == http.MethodGet:
 			w.WriteHeader(http.StatusOK)
 			if err := json.NewEncoder(w).Encode(api.ListResponse{
 				Models: []api.ListModelResponse{{Name: "gpt-oss:20b-cloud"}},
 			}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		case r.URL.Path == "/api/pull" && r.Method == http.MethodPost:
 			pullCalled = true
 			w.WriteHeader(http.StatusOK)
 			if err := json.NewEncoder(w).Encode(api.ProgressResponse{Status: "success"}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		case r.URL.Path == "/api/generate" && r.Method == http.MethodPost:
 			generateCalled = true
 			w.WriteHeader(http.StatusOK)
 			if err := json.NewEncoder(w).Encode(api.GenerateResponse{Done: true}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		default:
 			http.NotFound(w, r)
 		}
 	}))
 	t.Setenv("OLLAMA_HOST", mockServer.URL)
 	t.Cleanup(mockServer.Close)
 	cmd := &cobra.Command{}
 	cmd.SetContext(t.Context())
 	cmd.Flags().String("keepalive", "", "")
 	cmd.Flags().Bool("truncate", false, "")
 	cmd.Flags().Int("dimensions", 0, "")
 	cmd.Flags().Bool("verbose", false, "")
 	cmd.Flags().Bool("insecure", false, "")
 	cmd.Flags().Bool("nowordwrap", false, "")
 	cmd.Flags().String("format", "", "")
 	cmd.Flags().String("think", "", "")
 	cmd.Flags().Bool("hidethinking", false, "")
 	err := RunHandler(cmd, []string{"gpt-oss:20b:cloud", "hi"})
 	if err != nil {
 		t.Fatalf("RunHandler returned error: %v", err)
 	}
 	if pullCalled {
 		t.Fatal("expected /api/pull not to be called when cloud stub already exists")
 	}
 	if !generateCalled {
 		t.Fatal("expected /api/generate to be called")
 	}
 }
 func TestRunHandler_ExplicitCloudStubPullFailure_IsBestEffortTEMP(t *testing.T) {
 	var generateCalled bool
 	mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch {
 		case r.URL.Path == "/api/show" && r.Method == http.MethodPost:
 			w.WriteHeader(http.StatusOK)
 			if err := json.NewEncoder(w).Encode(api.ShowResponse{
 				Capabilities: []model.Capability{model.CapabilityCompletion},
 				RemoteModel:  "gpt-oss:20b",
 			}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		case r.URL.Path == "/api/tags" && r.Method == http.MethodGet:
 			w.WriteHeader(http.StatusOK)
 			if err := json.NewEncoder(w).Encode(api.ListResponse{Models: nil}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		case r.URL.Path == "/api/pull" && r.Method == http.MethodPost:
 			w.WriteHeader(http.StatusInternalServerError)
 			if err := json.NewEncoder(w).Encode(map[string]string{"error": "pull failed"}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		case r.URL.Path == "/api/generate" && r.Method == http.MethodPost:
 			generateCalled = true
 			w.WriteHeader(http.StatusOK)
 			if err := json.NewEncoder(w).Encode(api.GenerateResponse{Done: true}); err != nil {
 				http.Error(w, err.Error(), http.StatusInternalServerError)
 			}
 			return
 		default:
 			http.NotFound(w, r)
 		}
 	}))
 	t.Setenv("OLLAMA_HOST", mockServer.URL)
 	t.Cleanup(mockServer.Close)
 	cmd := &cobra.Command{}
 	cmd.SetContext(t.Context())
 	cmd.Flags().String("keepalive", "", "")
 	cmd.Flags().Bool("truncate", false, "")
 	cmd.Flags().Int("dimensions", 0, "")
 	cmd.Flags().Bool("verbose", false, "")
 	cmd.Flags().Bool("insecure", false, "")
 	cmd.Flags().Bool("nowordwrap", false, "")
 	cmd.Flags().String("format", "", "")
 	cmd.Flags().String("think", "", "")
 	cmd.Flags().Bool("hidethinking", false, "")
 	err := RunHandler(cmd, []string{"gpt-oss:20b:cloud", "hi"})
 	if err != nil {
 		t.Fatalf("RunHandler returned error: %v", err)
 	}
 	if !generateCalled {
 		t.Fatal("expected /api/generate to be called despite pull failure")
 	}
 }
 func TestGetModelfileName(t *testing.T) {
 	tests := []struct {
 		name          string
@@ -1211,6 +1560,20 @@ func TestNewCreateRequest(t *testing.T) {
 				Model: "newmodel",
 			},
 		},
 		{
 			"explicit cloud model preserves source when parent lacks it",
 			"newmodel",
 			runOptions{
 				Model:       "qwen3.5:cloud",
 				ParentModel: "qwen3.5",
 				Messages:    []api.Message{},
 				WordWrap:    true,
 			},
 			&api.CreateRequest{
 				From:  "qwen3.5:cloud",
 				Model: "newmodel",
 			},
 		},
 		{
 			"parent model as filepath test",
 			"newmodel",
@@ -1292,6 +1655,24 @@ func TestNewCreateRequest(t *testing.T) {
 				},
 			},
 		},
 		{
 			"loaded messages are preserved when saving",
 			"newmodel",
 			runOptions{
 				Model:          "mymodel",
 				ParentModel:    "parentmodel",
 				LoadedMessages: []api.Message{{Role: "assistant", Content: "loaded"}},
 				Messages:       []api.Message{{Role: "user", Content: "new"}},
 			},
 			&api.CreateRequest{
 				From:  "parentmodel",
 				Model: "newmodel",
 				Messages: []api.Message{
 					{Role: "assistant", Content: "loaded"},
 					{Role: "user", Content: "new"},
 				},
 			},
 		},
 	}
 	for _, tt := range tests {
@@ -1304,15 +1685,43 @@ func TestNewCreateRequest(t *testing.T) {
 	}
 }
 func TestApplyShowResponseToRunOptions(t *testing.T) {
 	opts := runOptions{}
 	info := &api.ShowResponse{
 		Details: api.ModelDetails{
 			ParentModel: "parentmodel",
 		},
 		Messages: []api.Message{
 			{Role: "assistant", Content: "loaded"},
 		},
 	}
 	applyShowResponseToRunOptions(&opts, info)
 	if opts.ParentModel != "parentmodel" {
 		t.Fatalf("ParentModel = %q, want %q", opts.ParentModel, "parentmodel")
 	}
 	if !cmp.Equal(opts.LoadedMessages, info.Messages) {
 		t.Fatalf("LoadedMessages = %#v, want %#v", opts.LoadedMessages, info.Messages)
 	}
 	info.Messages[0].Content = "modified"
 	if opts.LoadedMessages[0].Content == "modified" {
 		t.Fatal("LoadedMessages should be copied independently from ShowResponse")
 	}
 }
 func TestRunOptions_Copy(t *testing.T) {
 	// Setup test data
 	originalKeepAlive := &api.Duration{Duration: 5 * time.Minute}
 	originalThink := &api.ThinkValue{Value: "test reasoning"}
 	original := runOptions{
-		Model:       "test-model",
+		Model:          "test-model",
-		ParentModel: "parent-model",
+		ParentModel:    "parent-model",
-		Prompt:      "test prompt",
+		LoadedMessages: []api.Message{{Role: "assistant", Content: "loaded hello"}},
 		Prompt:         "test prompt",
 		Messages: []api.Message{
 			{Role: "user", Content: "hello"},
 			{Role: "assistant", Content: "hi there"},
@@ -1352,6 +1761,7 @@ func TestRunOptions_Copy(t *testing.T) {
 	}{
 		{"Model", copied.Model, original.Model},
 		{"ParentModel", copied.ParentModel, original.ParentModel},
 		{"LoadedMessages", copied.LoadedMessages, original.LoadedMessages},
 		{"Prompt", copied.Prompt, original.Prompt},
 		{"WordWrap", copied.WordWrap, original.WordWrap},
 		{"Format", copied.Format, original.Format},
@@ -1456,13 +1866,18 @@ func TestRunOptions_Copy(t *testing.T) {
 func TestRunOptions_Copy_EmptySlicesAndMaps(t *testing.T) {
 	// Test with empty slices and maps
 	original := runOptions{
-		Messages: []api.Message{},
+		LoadedMessages: []api.Message{},
-		Images:   []api.ImageData{},
+		Messages:       []api.Message{},
-		Options:  map[string]any{},
+		Images:         []api.ImageData{},
 		Options:        map[string]any{},
 	}
 	copied := original.Copy()
 	if copied.LoadedMessages == nil {
 		t.Error("Empty LoadedMessages slice should remain empty, not nil")
 	}
 	if copied.Messages == nil {
 		t.Error("Empty Messages slice should remain empty, not nil")
 	}
@@ -1479,6 +1894,10 @@ func TestRunOptions_Copy_EmptySlicesAndMaps(t *testing.T) {
 		t.Error("Empty Messages slice should remain empty")
 	}
 	if len(copied.LoadedMessages) != 0 {
 		t.Error("Empty LoadedMessages slice should remain empty")
 	}
 	if len(copied.Images) != 0 {
 		t.Error("Empty Images slice should remain empty")
 	}
@@ -1553,10 +1972,10 @@ func TestShowInfoImageGen(t *testing.T) {
 		Details: api.ModelDetails{
 			Family:            "ZImagePipeline",
 			ParameterSize:     "10.3B",
-			QuantizationLevel: "FP8",
+			QuantizationLevel: "Q8",
 		},
 		Capabilities: []model.Capability{model.CapabilityImage},
-		Requires:     "0.14.0",
+		Requires:     "0.19.0",
 	}, false, &b)
 	if err != nil {
 		t.Fatal(err)
@@ -1565,8 +1984,8 @@ func TestShowInfoImageGen(t *testing.T) {
 	expect := "  Model\n" +
 		"    architecture    ZImagePipeline    \n" +
 		"    parameters      10.3B             \n" +
-		"    quantization    FP8               \n" +
+		"    quantization    Q8                \n" +
-		"    requires        0.14.0            \n" +
+		"    requires        0.19.0            \n" +
 		"\n" +
 		"  Capabilities\n" +
 		"    image    \n" +
@@ -1624,16 +2043,20 @@ func TestRunOptions_Copy_Independence(t *testing.T) {
 	// Test that modifications to original don't affect copy
 	originalThink := &api.ThinkValue{Value: "original"}
 	original := runOptions{
-		Model:    "original-model",
+		Model:          "original-model",
-		Messages: []api.Message{{Role: "user", Content: "original"}},
+		LoadedMessages: []api.Message{{Role: "assistant", Content: "loaded"}},
-		Options:  map[string]any{"key": "value"},
+		Messages:       []api.Message{{Role: "user", Content: "original"}},
-		Think:    originalThink,
+		Options:        map[string]any{"key": "value"},
 		Think:          originalThink,
 	}
 	copied := original.Copy()
 	// Modify original
 	original.Model = "modified-model"
 	if len(original.LoadedMessages) > 0 {
 		original.LoadedMessages[0].Content = "modified loaded"
 	}
 	if len(original.Messages) > 0 {
 		original.Messages[0].Content = "modified"
 	}
@@ -1647,6 +2070,10 @@ func TestRunOptions_Copy_Independence(t *testing.T) {
 		t.Error("Copy Model should not be affected by original modification")
 	}
 	if len(copied.LoadedMessages) > 0 && copied.LoadedMessages[0].Content == "modified loaded" {
 		t.Error("Copy LoadedMessages should not be affected by original modification")
 	}
 	if len(copied.Messages) > 0 && copied.Messages[0].Content == "modified" {
 		t.Error("Copy Messages should not be affected by original modification")
 	}
@@ -1659,3 +2086,194 @@ func TestRunOptions_Copy_Independence(t *testing.T) {
 		t.Error("Copy Think should not be affected by original modification")
 	}
 }
 func TestLoadOrUnloadModel_CloudModelAuth(t *testing.T) {
 	tests := []struct {
 		name            string
 		model           string
 		showStatus      int
 		remoteHost      string
 		remoteModel     string
 		whoamiStatus    int
 		whoamiResp      any
 		expectWhoami    bool
 		expectedError   string
 		expectAuthError bool
 	}{
 		{
 			name:         "ollama.com cloud model - user signed in",
 			model:        "test-cloud-model",
 			remoteHost:   "https://ollama.com",
 			remoteModel:  "test-model",
 			whoamiStatus: http.StatusOK,
 			whoamiResp:   api.UserResponse{Name: "testuser"},
 			expectWhoami: true,
 		},
 		{
 			name:         "ollama.com cloud model - user not signed in",
 			model:        "test-cloud-model",
 			remoteHost:   "https://ollama.com",
 			remoteModel:  "test-model",
 			whoamiStatus: http.StatusUnauthorized,
 			whoamiResp: map[string]string{
 				"error":      "unauthorized",
 				"signin_url": "https://ollama.com/signin",
 			},
 			expectWhoami:    true,
 			expectedError:   "unauthorized",
 			expectAuthError: true,
 		},
 		{
 			name:         "non-ollama.com remote - no auth check",
 			model:        "test-cloud-model",
 			remoteHost:   "https://other-remote.com",
 			remoteModel:  "test-model",
 			whoamiStatus: http.StatusUnauthorized, // should not be called
 			whoamiResp:   nil,
 		},
 		{
 			name:         "explicit :cloud model - auth check without remote metadata",
 			model:        "kimi-k2.5:cloud",
 			remoteHost:   "",
 			remoteModel:  "",
 			whoamiStatus: http.StatusOK,
 			whoamiResp:   api.UserResponse{Name: "testuser"},
 			expectWhoami: true,
 		},
 		{
 			name:            "explicit :cloud model without local stub returns not found by default",
 			model:           "minimax-m2.7:cloud",
 			showStatus:      http.StatusNotFound,
 			whoamiStatus:    http.StatusOK,
 			whoamiResp:      api.UserResponse{Name: "testuser"},
 			expectedError:   "not found",
 			expectWhoami:    false,
 			expectAuthError: false,
 		},
 		{
 			name:         "explicit -cloud model - auth check without remote metadata",
 			model:        "kimi-k2.5:latest-cloud",
 			remoteHost:   "",
 			remoteModel:  "",
 			whoamiStatus: http.StatusOK,
 			whoamiResp:   api.UserResponse{Name: "testuser"},
 			expectWhoami: true,
 		},
 		{
 			name:         "dash cloud-like name without explicit source does not require auth",
 			model:        "test-cloud-model",
 			remoteHost:   "",
 			remoteModel:  "",
 			whoamiStatus: http.StatusUnauthorized, // should not be called
 			whoamiResp:   nil,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			whoamiCalled := false
 			mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 				switch r.URL.Path {
 				case "/api/show":
 					if tt.showStatus != 0 && tt.showStatus != http.StatusOK {
 						w.WriteHeader(tt.showStatus)
 						_ = json.NewEncoder(w).Encode(map[string]string{"error": "not found"})
 						return
 					}
 					w.Header().Set("Content-Type", "application/json")
 					if err := json.NewEncoder(w).Encode(api.ShowResponse{
 						RemoteHost:  tt.remoteHost,
 						RemoteModel: tt.remoteModel,
 					}); err != nil {
 						http.Error(w, err.Error(), http.StatusInternalServerError)
 					}
 				case "/api/me":
 					whoamiCalled = true
 					w.Header().Set("Content-Type", "application/json")
 					w.WriteHeader(tt.whoamiStatus)
 					if tt.whoamiResp != nil {
 						if err := json.NewEncoder(w).Encode(tt.whoamiResp); err != nil {
 							http.Error(w, err.Error(), http.StatusInternalServerError)
 						}
 					}
 				case "/api/generate":
 					w.WriteHeader(http.StatusOK)
 				default:
 					http.NotFound(w, r)
 				}
 			}))
 			defer mockServer.Close()
 			t.Setenv("OLLAMA_HOST", mockServer.URL)
 			cmd := &cobra.Command{}
 			cmd.SetContext(t.Context())
 			opts := &runOptions{
 				Model:       tt.model,
 				ShowConnect: false,
 			}
 			err := loadOrUnloadModel(cmd, opts)
 			if whoamiCalled != tt.expectWhoami {
 				t.Errorf("whoami called = %v, want %v", whoamiCalled, tt.expectWhoami)
 			}
 			if tt.expectedError != "" {
 				if err == nil {
 					t.Errorf("expected error containing %q, got nil", tt.expectedError)
 				} else {
 					if !tt.expectAuthError && !strings.Contains(strings.ToLower(err.Error()), strings.ToLower(tt.expectedError)) {
 						t.Errorf("expected error containing %q, got %v", tt.expectedError, err)
 					}
 					if tt.expectAuthError {
 						var authErr api.AuthorizationError
 						if !errors.As(err, &authErr) {
 							t.Errorf("expected AuthorizationError, got %T: %v", err, err)
 						}
 					}
 				}
 			} else {
 				if err != nil {
 					t.Errorf("expected no error, got %v", err)
 				}
 			}
 		})
 	}
 }
 func TestIsLocalhost(t *testing.T) {
 	tests := []struct {
 		name     string
 		host     string
 		expected bool
 	}{
 		{"default empty", "", true},
 		{"localhost no port", "localhost", true},
 		{"localhost with port", "localhost:11435", true},
 		{"127.0.0.1 no port", "127.0.0.1", true},
 		{"127.0.0.1 with port", "127.0.0.1:11434", true},
 		{"0.0.0.0 no port", "0.0.0.0", true},
 		{"0.0.0.0 with port", "0.0.0.0:11434", true},
 		{"::1 no port", "::1", true},
 		{"[::1] with port", "[::1]:11434", true},
 		{"loopback with scheme", "http://localhost:11434", true},
 		{"remote hostname", "example.com", false},
 		{"remote hostname with port", "example.com:11434", false},
 		{"remote IP", "192.168.1.1", false},
 		{"remote IP with port", "192.168.1.1:11434", false},
 		{"remote with scheme", "http://example.com:11434", false},
 		{"https remote", "https://example.com:443", false},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			t.Setenv("OLLAMA_HOST", tt.host)
 			got := isLocalhost()
 			if got != tt.expected {
 				t.Errorf("isLocalhost() with OLLAMA_HOST=%q = %v, want %v", tt.host, got, tt.expected)
 			}
 		})
 	}
 }
--- a/cmd/config/config.go
+++ b/cmd/config/config.go
@@ -0,0 +1,284 @@
 // Package config provides integration configuration for external coding tools
 // (Claude Code, Codex, Droid, OpenCode) to use Ollama models.
 package config
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 	"github.com/ollama/ollama/cmd/internal/fileutil"
 )
 type integration struct {
 	Models    []string          `json:"models"`
 	Aliases   map[string]string `json:"aliases,omitempty"`
 	Onboarded bool              `json:"onboarded,omitempty"`
 }
 // IntegrationConfig is the persisted config for one integration.
 type IntegrationConfig = integration
 type config struct {
 	Integrations  map[string]*integration `json:"integrations"`
 	LastModel     string                  `json:"last_model,omitempty"`
 	LastSelection string                  `json:"last_selection,omitempty"` // "run" or integration name
 }
 func configPath() (string, error) {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return "", err
 	}
 	return filepath.Join(home, ".ollama", "config.json"), nil
 }
 func legacyConfigPath() (string, error) {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return "", err
 	}
 	return filepath.Join(home, ".ollama", "config", "config.json"), nil
 }
 // migrateConfig moves the config from the legacy path to ~/.ollama/config.json
 func migrateConfig() (bool, error) {
 	oldPath, err := legacyConfigPath()
 	if err != nil {
 		return false, err
 	}
 	oldData, err := os.ReadFile(oldPath)
 	if err != nil {
 		if os.IsNotExist(err) {
 			return false, nil
 		}
 		return false, err
 	}
 	// Ignore legacy files with invalid JSON and continue startup.
 	if !json.Valid(oldData) {
 		return false, nil
 	}
 	newPath, err := configPath()
 	if err != nil {
 		return false, err
 	}
 	if err := os.MkdirAll(filepath.Dir(newPath), 0o755); err != nil {
 		return false, err
 	}
 	if err := os.WriteFile(newPath, oldData, 0o644); err != nil {
 		return false, fmt.Errorf("write new config: %w", err)
 	}
 	_ = os.Remove(oldPath)
 	_ = os.Remove(filepath.Dir(oldPath)) // clean up empty directory
 	return true, nil
 }
 func load() (*config, error) {
 	path, err := configPath()
 	if err != nil {
 		return nil, err
 	}
 	data, err := os.ReadFile(path)
 	if err != nil && os.IsNotExist(err) {
 		if migrated, merr := migrateConfig(); merr == nil && migrated {
 			data, err = os.ReadFile(path)
 		}
 	}
 	if err != nil {
 		if os.IsNotExist(err) {
 			return &config{Integrations: make(map[string]*integration)}, nil
 		}
 		return nil, err
 	}
 	var cfg config
 	if err := json.Unmarshal(data, &cfg); err != nil {
 		return nil, fmt.Errorf("failed to parse config: %w, at: %s", err, path)
 	}
 	if cfg.Integrations == nil {
 		cfg.Integrations = make(map[string]*integration)
 	}
 	return &cfg, nil
 }
 func save(cfg *config) error {
 	path, err := configPath()
 	if err != nil {
 		return err
 	}
 	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
 		return err
 	}
 	data, err := json.MarshalIndent(cfg, "", "  ")
 	if err != nil {
 		return err
 	}
 	return fileutil.WriteWithBackup(path, data)
 }
 func SaveIntegration(appName string, models []string) error {
 	if appName == "" {
 		return errors.New("app name cannot be empty")
 	}
 	cfg, err := load()
 	if err != nil {
 		return err
 	}
 	key := strings.ToLower(appName)
 	existing := cfg.Integrations[key]
 	var aliases map[string]string
 	var onboarded bool
 	if existing != nil {
 		aliases = existing.Aliases
 		onboarded = existing.Onboarded
 	}
 	cfg.Integrations[key] = &integration{
 		Models:    models,
 		Aliases:   aliases,
 		Onboarded: onboarded,
 	}
 	return save(cfg)
 }
 // MarkIntegrationOnboarded marks an integration as onboarded in Ollama's config.
 func MarkIntegrationOnboarded(appName string) error {
 	cfg, err := load()
 	if err != nil {
 		return err
 	}
 	key := strings.ToLower(appName)
 	existing := cfg.Integrations[key]
 	if existing == nil {
 		existing = &integration{}
 	}
 	existing.Onboarded = true
 	cfg.Integrations[key] = existing
 	return save(cfg)
 }
 // IntegrationModel returns the first configured model for an integration, or empty string if not configured.
 func IntegrationModel(appName string) string {
 	integrationConfig, err := LoadIntegration(appName)
 	if err != nil || len(integrationConfig.Models) == 0 {
 		return ""
 	}
 	return integrationConfig.Models[0]
 }
 // IntegrationModels returns all configured models for an integration, or nil.
 func IntegrationModels(appName string) []string {
 	integrationConfig, err := LoadIntegration(appName)
 	if err != nil || len(integrationConfig.Models) == 0 {
 		return nil
 	}
 	return integrationConfig.Models
 }
 // LastModel returns the last model that was run, or empty string if none.
 func LastModel() string {
 	cfg, err := load()
 	if err != nil {
 		return ""
 	}
 	return cfg.LastModel
 }
 // SetLastModel saves the last model that was run.
 func SetLastModel(model string) error {
 	cfg, err := load()
 	if err != nil {
 		return err
 	}
 	cfg.LastModel = model
 	return save(cfg)
 }
 // LastSelection returns the last menu selection ("run" or integration name), or empty string if none.
 func LastSelection() string {
 	cfg, err := load()
 	if err != nil {
 		return ""
 	}
 	return cfg.LastSelection
 }
 // SetLastSelection saves the last menu selection ("run" or integration name).
 func SetLastSelection(selection string) error {
 	cfg, err := load()
 	if err != nil {
 		return err
 	}
 	cfg.LastSelection = selection
 	return save(cfg)
 }
 // LoadIntegration returns the saved config for one integration.
 func LoadIntegration(appName string) (*integration, error) {
 	cfg, err := load()
 	if err != nil {
 		return nil, err
 	}
 	integrationConfig, ok := cfg.Integrations[strings.ToLower(appName)]
 	if !ok {
 		return nil, os.ErrNotExist
 	}
 	return integrationConfig, nil
 }
 // SaveAliases replaces the saved aliases for one integration.
 func SaveAliases(appName string, aliases map[string]string) error {
 	if appName == "" {
 		return errors.New("app name cannot be empty")
 	}
 	cfg, err := load()
 	if err != nil {
 		return err
 	}
 	key := strings.ToLower(appName)
 	existing := cfg.Integrations[key]
 	if existing == nil {
 		existing = &integration{}
 	}
 	// Replace aliases entirely (not merge) so deletions are persisted
 	existing.Aliases = aliases
 	cfg.Integrations[key] = existing
 	return save(cfg)
 }
 func listIntegrations() ([]integration, error) {
 	cfg, err := load()
 	if err != nil {
 		return nil, err
 	}
 	result := make([]integration, 0, len(cfg.Integrations))
 	for _, integrationConfig := range cfg.Integrations {
 		result = append(result, *integrationConfig)
 	}
 	return result, nil
 }
--- a/cmd/config/config_cloud_test.go
+++ b/cmd/config/config_cloud_test.go
@@ -0,0 +1,641 @@
 package config
 import (
 	"errors"
 	"os"
 	"path/filepath"
 	"testing"
 )
 func TestSetAliases_CloudModel(t *testing.T) {
 	// Test the SetAliases logic by checking the alias map behavior
 	aliases := map[string]string{
 		"primary": "kimi-k2.5:cloud",
 		"fast":    "kimi-k2.5:cloud",
 	}
 	// Verify fast is set (cloud model behavior)
 	if aliases["fast"] == "" {
 		t.Error("cloud model should have fast alias set")
 	}
 	if aliases["fast"] != aliases["primary"] {
 		t.Errorf("fast should equal primary for auto-set, got fast=%q primary=%q", aliases["fast"], aliases["primary"])
 	}
 }
 func TestSetAliases_LocalModel(t *testing.T) {
 	aliases := map[string]string{
 		"primary": "llama3.2:latest",
 	}
 	// Simulate local model behavior: fast should be empty
 	delete(aliases, "fast")
 	if aliases["fast"] != "" {
 		t.Error("local model should have empty fast alias")
 	}
 }
 func TestSaveAliases_ReplacesNotMerges(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	// First save with both primary and fast
 	initial := map[string]string{
 		"primary": "cloud-model",
 		"fast":    "cloud-model",
 	}
 	if err := SaveAliases("claude", initial); err != nil {
 		t.Fatalf("failed to save initial aliases: %v", err)
 	}
 	// Verify both are saved
 	loaded, err := LoadIntegration("claude")
 	if err != nil {
 		t.Fatalf("failed to load: %v", err)
 	}
 	if loaded.Aliases["fast"] != "cloud-model" {
 		t.Errorf("expected fast=cloud-model, got %q", loaded.Aliases["fast"])
 	}
 	// Now save without fast (simulating switch to local model)
 	updated := map[string]string{
 		"primary": "local-model",
 		// fast intentionally missing
 	}
 	if err := SaveAliases("claude", updated); err != nil {
 		t.Fatalf("failed to save updated aliases: %v", err)
 	}
 	// Verify fast is GONE (not merged/preserved)
 	loaded, err = LoadIntegration("claude")
 	if err != nil {
 		t.Fatalf("failed to load after update: %v", err)
 	}
 	if loaded.Aliases["fast"] != "" {
 		t.Errorf("fast should be removed after saving without it, got %q", loaded.Aliases["fast"])
 	}
 	if loaded.Aliases["primary"] != "local-model" {
 		t.Errorf("primary should be updated to local-model, got %q", loaded.Aliases["primary"])
 	}
 }
 func TestSaveAliases_PreservesModels(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	// First save integration with models
 	if err := SaveIntegration("claude", []string{"model1", "model2"}); err != nil {
 		t.Fatalf("failed to save integration: %v", err)
 	}
 	// Then update aliases
 	aliases := map[string]string{"primary": "new-model"}
 	if err := SaveAliases("claude", aliases); err != nil {
 		t.Fatalf("failed to save aliases: %v", err)
 	}
 	// Verify models are preserved
 	loaded, err := LoadIntegration("claude")
 	if err != nil {
 		t.Fatalf("failed to load: %v", err)
 	}
 	if len(loaded.Models) != 2 || loaded.Models[0] != "model1" {
 		t.Errorf("models should be preserved, got %v", loaded.Models)
 	}
 }
 // TestSaveAliases_EmptyMap clears all aliases
 func TestSaveAliases_EmptyMap(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	// Save with aliases
 	if err := SaveAliases("claude", map[string]string{"primary": "model", "fast": "model"}); err != nil {
 		t.Fatalf("failed to save: %v", err)
 	}
 	// Save empty map
 	if err := SaveAliases("claude", map[string]string{}); err != nil {
 		t.Fatalf("failed to save empty: %v", err)
 	}
 	loaded, err := LoadIntegration("claude")
 	if err != nil {
 		t.Fatalf("failed to load: %v", err)
 	}
 	if len(loaded.Aliases) != 0 {
 		t.Errorf("aliases should be empty, got %v", loaded.Aliases)
 	}
 }
 // TestSaveAliases_NilMap handles nil gracefully
 func TestSaveAliases_NilMap(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	// Save with aliases first
 	if err := SaveAliases("claude", map[string]string{"primary": "model"}); err != nil {
 		t.Fatalf("failed to save: %v", err)
 	}
 	// Save nil map - should clear aliases
 	if err := SaveAliases("claude", nil); err != nil {
 		t.Fatalf("failed to save nil: %v", err)
 	}
 	loaded, err := LoadIntegration("claude")
 	if err != nil {
 		t.Fatalf("failed to load: %v", err)
 	}
 	if len(loaded.Aliases) > 0 {
 		t.Errorf("aliases should be nil or empty, got %v", loaded.Aliases)
 	}
 }
 // TestSaveAliases_EmptyAppName returns error
 func TestSaveAliases_EmptyAppName(t *testing.T) {
 	err := SaveAliases("", map[string]string{"primary": "model"})
 	if err == nil {
 		t.Error("expected error for empty app name")
 	}
 }
 func TestSaveAliases_CaseInsensitive(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	if err := SaveAliases("Claude", map[string]string{"primary": "model1"}); err != nil {
 		t.Fatalf("failed to save: %v", err)
 	}
 	// Load with different case
 	loaded, err := LoadIntegration("claude")
 	if err != nil {
 		t.Fatalf("failed to load: %v", err)
 	}
 	if loaded.Aliases["primary"] != "model1" {
 		t.Errorf("expected primary=model1, got %q", loaded.Aliases["primary"])
 	}
 	// Update with different case
 	if err := SaveAliases("CLAUDE", map[string]string{"primary": "model2"}); err != nil {
 		t.Fatalf("failed to update: %v", err)
 	}
 	loaded, err = LoadIntegration("claude")
 	if err != nil {
 		t.Fatalf("failed to load after update: %v", err)
 	}
 	if loaded.Aliases["primary"] != "model2" {
 		t.Errorf("expected primary=model2, got %q", loaded.Aliases["primary"])
 	}
 }
 // TestSaveAliases_CreatesIntegration creates integration if it doesn't exist
 func TestSaveAliases_CreatesIntegration(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	// Save aliases for non-existent integration
 	if err := SaveAliases("newintegration", map[string]string{"primary": "model"}); err != nil {
 		t.Fatalf("failed to save: %v", err)
 	}
 	loaded, err := LoadIntegration("newintegration")
 	if err != nil {
 		t.Fatalf("failed to load: %v", err)
 	}
 	if loaded.Aliases["primary"] != "model" {
 		t.Errorf("expected primary=model, got %q", loaded.Aliases["primary"])
 	}
 }
 func TestConfigureAliases_AliasMap(t *testing.T) {
 	t.Run("cloud model auto-sets fast to primary", func(t *testing.T) {
 		aliases := make(map[string]string)
 		aliases["primary"] = "cloud-model"
 		// Simulate cloud model behavior
 		isCloud := true
 		if isCloud {
 			if aliases["fast"] == "" {
 				aliases["fast"] = aliases["primary"]
 			}
 		}
 		if aliases["fast"] != "cloud-model" {
 			t.Errorf("expected fast=cloud-model, got %q", aliases["fast"])
 		}
 	})
 	t.Run("cloud model preserves custom fast", func(t *testing.T) {
 		aliases := map[string]string{
 			"primary": "cloud-model",
 			"fast":    "custom-fast-model",
 		}
 		// Simulate cloud model behavior - should preserve existing fast
 		isCloud := true
 		if isCloud {
 			if aliases["fast"] == "" {
 				aliases["fast"] = aliases["primary"]
 			}
 		}
 		if aliases["fast"] != "custom-fast-model" {
 			t.Errorf("expected fast=custom-fast-model (preserved), got %q", aliases["fast"])
 		}
 	})
 	t.Run("local model clears fast", func(t *testing.T) {
 		aliases := map[string]string{
 			"primary": "local-model",
 			"fast":    "should-be-cleared",
 		}
 		// Simulate local model behavior
 		isCloud := false
 		if !isCloud {
 			delete(aliases, "fast")
 		}
 		if aliases["fast"] != "" {
 			t.Errorf("expected fast to be cleared, got %q", aliases["fast"])
 		}
 	})
 	t.Run("switching cloud to local clears fast", func(t *testing.T) {
 		// Start with cloud config
 		aliases := map[string]string{
 			"primary": "cloud-model",
 			"fast":    "cloud-model",
 		}
 		// Switch to local
 		aliases["primary"] = "local-model"
 		isCloud := false
 		if !isCloud {
 			delete(aliases, "fast")
 		}
 		if aliases["fast"] != "" {
 			t.Errorf("fast should be cleared when switching to local, got %q", aliases["fast"])
 		}
 		if aliases["primary"] != "local-model" {
 			t.Errorf("primary should be updated, got %q", aliases["primary"])
 		}
 	})
 	t.Run("switching local to cloud sets fast", func(t *testing.T) {
 		// Start with local config (no fast)
 		aliases := map[string]string{
 			"primary": "local-model",
 		}
 		// Switch to cloud
 		aliases["primary"] = "cloud-model"
 		isCloud := true
 		if isCloud {
 			if aliases["fast"] == "" {
 				aliases["fast"] = aliases["primary"]
 			}
 		}
 		if aliases["fast"] != "cloud-model" {
 			t.Errorf("fast should be set when switching to cloud, got %q", aliases["fast"])
 		}
 	})
 }
 func TestSetAliases_PrefixMapping(t *testing.T) {
 	// This tests the expected mapping without needing a real client
 	aliases := map[string]string{
 		"primary": "my-cloud-model",
 		"fast":    "my-fast-model",
 	}
 	expectedMappings := map[string]string{
 		"claude-sonnet-": aliases["primary"],
 		"claude-haiku-":  aliases["fast"],
 	}
 	if expectedMappings["claude-sonnet-"] != "my-cloud-model" {
 		t.Errorf("claude-sonnet- should map to primary")
 	}
 	if expectedMappings["claude-haiku-"] != "my-fast-model" {
 		t.Errorf("claude-haiku- should map to fast")
 	}
 }
 func TestSetAliases_LocalDeletesPrefixes(t *testing.T) {
 	aliases := map[string]string{
 		"primary": "local-model",
 		// fast is empty/missing - indicates local model
 	}
 	prefixesToDelete := []string{"claude-sonnet-", "claude-haiku-"}
 	// Verify the logic: when fast is empty, we should delete
 	if aliases["fast"] != "" {
 		t.Error("fast should be empty for local model")
 	}
 	// Verify we have the right prefixes to delete
 	if len(prefixesToDelete) != 2 {
 		t.Errorf("expected 2 prefixes to delete, got %d", len(prefixesToDelete))
 	}
 }
 // TestAtomicUpdate_ServerFailsConfigNotSaved simulates atomic update behavior
 func TestAtomicUpdate_ServerFailsConfigNotSaved(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	// Simulate: server fails, config should NOT be saved
 	serverErr := errors.New("server unavailable")
 	if serverErr == nil {
 		t.Error("config should NOT be saved when server fails")
 	}
 }
 // TestAtomicUpdate_ServerSucceedsConfigSaved simulates successful atomic update
 func TestAtomicUpdate_ServerSucceedsConfigSaved(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	// Simulate: server succeeds, config should be saved
 	var serverErr error
 	if serverErr != nil {
 		t.Fatal("server should succeed")
 	}
 	if err := SaveAliases("claude", map[string]string{"primary": "model"}); err != nil {
 		t.Fatalf("saveAliases failed: %v", err)
 	}
 	// Verify it was actually saved
 	loaded, err := LoadIntegration("claude")
 	if err != nil {
 		t.Fatalf("failed to load: %v", err)
 	}
 	if loaded.Aliases["primary"] != "model" {
 		t.Errorf("expected primary=model, got %q", loaded.Aliases["primary"])
 	}
 }
 func TestConfigFile_PreservesUnknownFields(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	// Write config with extra fields
 	configPath := filepath.Join(tmpDir, ".ollama", "config.json")
 	os.MkdirAll(filepath.Dir(configPath), 0o755)
 	// Note: Our config struct only has Integrations, so top-level unknown fields
 	// won't be preserved by our current implementation. This test documents that.
 	initialConfig := `{
  "integrations": {
    "claude": {
      "models": ["model1"],
      "aliases": {"primary": "model1"},
      "unknownField": "should be lost"
    }
  },
  "topLevelUnknown": "will be lost"
 }`
 	os.WriteFile(configPath, []byte(initialConfig), 0o644)
 	// Update aliases
 	if err := SaveAliases("claude", map[string]string{"primary": "model2"}); err != nil {
 		t.Fatalf("failed to save: %v", err)
 	}
 	// Read raw file to check
 	data, _ := os.ReadFile(configPath)
 	content := string(data)
 	// models should be preserved
 	if !contains(content, "model1") {
 		t.Error("models should be preserved")
 	}
 	// primary should be updated
 	if !contains(content, "model2") {
 		t.Error("primary should be updated to model2")
 	}
 }
 func contains(s, substr string) bool {
 	return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsHelper(s, substr))
 }
 func containsHelper(s, substr string) bool {
 	for i := 0; i <= len(s)-len(substr); i++ {
 		if s[i:i+len(substr)] == substr {
 			return true
 		}
 	}
 	return false
 }
 func TestModelNameEdgeCases(t *testing.T) {
 	testCases := []struct {
 		name  string
 		model string
 	}{
 		{"simple", "llama3.2"},
 		{"with tag", "llama3.2:latest"},
 		{"with cloud tag", "kimi-k2.5:cloud"},
 		{"with namespace", "library/llama3.2"},
 		{"with dots", "glm-4.7-flash"},
 		{"with numbers", "qwen3:8b"},
 	}
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			tmpDir := t.TempDir()
 			setTestHome(t, tmpDir)
 			aliases := map[string]string{"primary": tc.model}
 			if err := SaveAliases("claude", aliases); err != nil {
 				t.Fatalf("failed to save model %q: %v", tc.model, err)
 			}
 			loaded, err := LoadIntegration("claude")
 			if err != nil {
 				t.Fatalf("failed to load: %v", err)
 			}
 			if loaded.Aliases["primary"] != tc.model {
 				t.Errorf("expected primary=%q, got %q", tc.model, loaded.Aliases["primary"])
 			}
 		})
 	}
 }
 func TestSwitchingScenarios(t *testing.T) {
 	t.Run("cloud to local removes fast", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		// Initial cloud config
 		if err := SaveAliases("claude", map[string]string{
 			"primary": "cloud-model",
 			"fast":    "cloud-model",
 		}); err != nil {
 			t.Fatal(err)
 		}
 		// Switch to local (no fast)
 		if err := SaveAliases("claude", map[string]string{
 			"primary": "local-model",
 		}); err != nil {
 			t.Fatal(err)
 		}
 		loaded, _ := LoadIntegration("claude")
 		if loaded.Aliases["fast"] != "" {
 			t.Errorf("fast should be removed, got %q", loaded.Aliases["fast"])
 		}
 		if loaded.Aliases["primary"] != "local-model" {
 			t.Errorf("primary should be local-model, got %q", loaded.Aliases["primary"])
 		}
 	})
 	t.Run("local to cloud adds fast", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		// Initial local config
 		if err := SaveAliases("claude", map[string]string{
 			"primary": "local-model",
 		}); err != nil {
 			t.Fatal(err)
 		}
 		// Switch to cloud (with fast)
 		if err := SaveAliases("claude", map[string]string{
 			"primary": "cloud-model",
 			"fast":    "cloud-model",
 		}); err != nil {
 			t.Fatal(err)
 		}
 		loaded, _ := LoadIntegration("claude")
 		if loaded.Aliases["fast"] != "cloud-model" {
 			t.Errorf("fast should be cloud-model, got %q", loaded.Aliases["fast"])
 		}
 	})
 	t.Run("cloud to different cloud updates both", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		// Initial cloud config
 		if err := SaveAliases("claude", map[string]string{
 			"primary": "cloud-model-1",
 			"fast":    "cloud-model-1",
 		}); err != nil {
 			t.Fatal(err)
 		}
 		// Switch to different cloud
 		if err := SaveAliases("claude", map[string]string{
 			"primary": "cloud-model-2",
 			"fast":    "cloud-model-2",
 		}); err != nil {
 			t.Fatal(err)
 		}
 		loaded, _ := LoadIntegration("claude")
 		if loaded.Aliases["primary"] != "cloud-model-2" {
 			t.Errorf("primary should be cloud-model-2, got %q", loaded.Aliases["primary"])
 		}
 		if loaded.Aliases["fast"] != "cloud-model-2" {
 			t.Errorf("fast should be cloud-model-2, got %q", loaded.Aliases["fast"])
 		}
 	})
 }
 func TestModelsAndAliasesMustStayInSync(t *testing.T) {
 	t.Run("saveAliases followed by saveIntegration keeps them in sync", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		// Save aliases with one model
 		if err := SaveAliases("claude", map[string]string{"primary": "model-a"}); err != nil {
 			t.Fatal(err)
 		}
 		// Save integration with same model (this is the pattern we use)
 		if err := SaveIntegration("claude", []string{"model-a"}); err != nil {
 			t.Fatal(err)
 		}
 		loaded, _ := LoadIntegration("claude")
 		if loaded.Aliases["primary"] != loaded.Models[0] {
 			t.Errorf("aliases.primary (%q) != models[0] (%q)", loaded.Aliases["primary"], loaded.Models[0])
 		}
 	})
 	t.Run("out of sync config is detectable", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		// Simulate out-of-sync state (like manual edit or bug)
 		if err := SaveIntegration("claude", []string{"old-model"}); err != nil {
 			t.Fatal(err)
 		}
 		if err := SaveAliases("claude", map[string]string{"primary": "new-model"}); err != nil {
 			t.Fatal(err)
 		}
 		loaded, _ := LoadIntegration("claude")
 		// They should be different (this is the bug state)
 		if loaded.Models[0] == loaded.Aliases["primary"] {
 			t.Error("expected out-of-sync state for this test")
 		}
 		// The fix: when updating aliases, also update models
 		if err := SaveIntegration("claude", []string{loaded.Aliases["primary"]}); err != nil {
 			t.Fatal(err)
 		}
 		loaded, _ = LoadIntegration("claude")
 		if loaded.Models[0] != loaded.Aliases["primary"] {
 			t.Errorf("after fix: models[0] (%q) should equal aliases.primary (%q)",
 				loaded.Models[0], loaded.Aliases["primary"])
 		}
 	})
 	t.Run("updating primary alias updates models too", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		// Initial state
 		if err := SaveIntegration("claude", []string{"initial-model"}); err != nil {
 			t.Fatal(err)
 		}
 		if err := SaveAliases("claude", map[string]string{"primary": "initial-model"}); err != nil {
 			t.Fatal(err)
 		}
 		// Update aliases AND models together
 		newAliases := map[string]string{"primary": "updated-model"}
 		if err := SaveAliases("claude", newAliases); err != nil {
 			t.Fatal(err)
 		}
 		if err := SaveIntegration("claude", []string{newAliases["primary"]}); err != nil {
 			t.Fatal(err)
 		}
 		loaded, _ := LoadIntegration("claude")
 		if loaded.Models[0] != "updated-model" {
 			t.Errorf("models[0] should be updated-model, got %q", loaded.Models[0])
 		}
 		if loaded.Aliases["primary"] != "updated-model" {
 			t.Errorf("aliases.primary should be updated-model, got %q", loaded.Aliases["primary"])
 		}
 	})
 }
--- a/cmd/config/config_test.go
+++ b/cmd/config/config_test.go
@@ -0,0 +1,530 @@
 package config
 import (
 	"os"
 	"path/filepath"
 	"strings"
 	"testing"
 )
 // setTestHome sets both HOME (Unix) and USERPROFILE (Windows) for cross-platform tests
 func setTestHome(t *testing.T, dir string) {
 	t.Setenv("HOME", dir)
 	t.Setenv("TMPDIR", dir)
 	t.Setenv("USERPROFILE", dir)
 }
 func TestIntegrationConfig(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	t.Run("save and load round-trip", func(t *testing.T) {
 		models := []string{"llama3.2", "mistral", "qwen2.5"}
 		if err := SaveIntegration("claude", models); err != nil {
 			t.Fatal(err)
 		}
 		config, err := LoadIntegration("claude")
 		if err != nil {
 			t.Fatal(err)
 		}
 		if len(config.Models) != len(models) {
 			t.Errorf("expected %d models, got %d", len(models), len(config.Models))
 		}
 		for i, m := range models {
 			if config.Models[i] != m {
 				t.Errorf("model %d: expected %s, got %s", i, m, config.Models[i])
 			}
 		}
 	})
 	t.Run("save and load aliases", func(t *testing.T) {
 		models := []string{"llama3.2"}
 		if err := SaveIntegration("claude", models); err != nil {
 			t.Fatal(err)
 		}
 		aliases := map[string]string{
 			"primary": "llama3.2:70b",
 			"fast":    "llama3.2:8b",
 		}
 		if err := SaveAliases("claude", aliases); err != nil {
 			t.Fatal(err)
 		}
 		config, err := LoadIntegration("claude")
 		if err != nil {
 			t.Fatal(err)
 		}
 		if config.Aliases == nil {
 			t.Fatal("expected aliases to be saved")
 		}
 		for k, v := range aliases {
 			if config.Aliases[k] != v {
 				t.Errorf("alias %s: expected %s, got %s", k, v, config.Aliases[k])
 			}
 		}
 	})
 	t.Run("saveIntegration preserves aliases", func(t *testing.T) {
 		if err := SaveIntegration("claude", []string{"model-a"}); err != nil {
 			t.Fatal(err)
 		}
 		if err := SaveAliases("claude", map[string]string{"primary": "model-a", "fast": "model-small"}); err != nil {
 			t.Fatal(err)
 		}
 		if err := SaveIntegration("claude", []string{"model-b"}); err != nil {
 			t.Fatal(err)
 		}
 		config, err := LoadIntegration("claude")
 		if err != nil {
 			t.Fatal(err)
 		}
 		if config.Aliases["primary"] != "model-a" {
 			t.Errorf("expected aliases to be preserved, got %v", config.Aliases)
 		}
 	})
 	t.Run("defaultModel returns first model", func(t *testing.T) {
 		SaveIntegration("codex", []string{"model-a", "model-b"})
 		config, _ := LoadIntegration("codex")
 		defaultModel := ""
 		if len(config.Models) > 0 {
 			defaultModel = config.Models[0]
 		}
 		if defaultModel != "model-a" {
 			t.Errorf("expected model-a, got %s", defaultModel)
 		}
 	})
 	t.Run("defaultModel returns empty for no models", func(t *testing.T) {
 		config := &integration{Models: []string{}}
 		defaultModel := ""
 		if len(config.Models) > 0 {
 			defaultModel = config.Models[0]
 		}
 		if defaultModel != "" {
 			t.Errorf("expected empty string, got %s", defaultModel)
 		}
 	})
 	t.Run("app name is case-insensitive", func(t *testing.T) {
 		SaveIntegration("Claude", []string{"model-x"})
 		config, err := LoadIntegration("claude")
 		if err != nil {
 			t.Fatal(err)
 		}
 		defaultModel := ""
 		if len(config.Models) > 0 {
 			defaultModel = config.Models[0]
 		}
 		if defaultModel != "model-x" {
 			t.Errorf("expected model-x, got %s", defaultModel)
 		}
 	})
 	t.Run("multiple integrations in single file", func(t *testing.T) {
 		SaveIntegration("app1", []string{"model-1"})
 		SaveIntegration("app2", []string{"model-2"})
 		config1, _ := LoadIntegration("app1")
 		config2, _ := LoadIntegration("app2")
 		defaultModel1 := ""
 		if len(config1.Models) > 0 {
 			defaultModel1 = config1.Models[0]
 		}
 		defaultModel2 := ""
 		if len(config2.Models) > 0 {
 			defaultModel2 = config2.Models[0]
 		}
 		if defaultModel1 != "model-1" {
 			t.Errorf("expected model-1, got %s", defaultModel1)
 		}
 		if defaultModel2 != "model-2" {
 			t.Errorf("expected model-2, got %s", defaultModel2)
 		}
 	})
 }
 func TestListIntegrations(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	t.Run("returns empty when no integrations", func(t *testing.T) {
 		configs, err := listIntegrations()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if len(configs) != 0 {
 			t.Errorf("expected 0 integrations, got %d", len(configs))
 		}
 	})
 	t.Run("returns all saved integrations", func(t *testing.T) {
 		SaveIntegration("claude", []string{"model-1"})
 		SaveIntegration("droid", []string{"model-2"})
 		configs, err := listIntegrations()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if len(configs) != 2 {
 			t.Errorf("expected 2 integrations, got %d", len(configs))
 		}
 	})
 }
 func TestLoadIntegration_CorruptedJSON(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	dir := filepath.Join(tmpDir, ".ollama")
 	os.MkdirAll(dir, 0o755)
 	os.WriteFile(filepath.Join(dir, "config.json"), []byte(`{corrupted json`), 0o644)
 	_, err := LoadIntegration("test")
 	if err == nil {
 		t.Error("expected error for nonexistent integration in corrupted file")
 	}
 }
 func TestSaveIntegration_NilModels(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	if err := SaveIntegration("test", nil); err != nil {
 		t.Fatalf("saveIntegration with nil models failed: %v", err)
 	}
 	config, err := LoadIntegration("test")
 	if err != nil {
 		t.Fatalf("loadIntegration failed: %v", err)
 	}
 	if config.Models == nil {
 		// nil is acceptable
 	} else if len(config.Models) != 0 {
 		t.Errorf("expected empty or nil models, got %v", config.Models)
 	}
 }
 func TestSaveIntegration_EmptyAppName(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	err := SaveIntegration("", []string{"model"})
 	if err == nil {
 		t.Error("expected error for empty app name, got nil")
 	}
 	if err != nil && !strings.Contains(err.Error(), "app name cannot be empty") {
 		t.Errorf("expected 'app name cannot be empty' error, got: %v", err)
 	}
 }
 func TestLoadIntegration_NonexistentIntegration(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	_, err := LoadIntegration("nonexistent")
 	if err == nil {
 		t.Error("expected error for nonexistent integration, got nil")
 	}
 	if !os.IsNotExist(err) {
 		t.Logf("error type is os.ErrNotExist as expected: %v", err)
 	}
 }
 func TestConfigPath(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	path, err := configPath()
 	if err != nil {
 		t.Fatal(err)
 	}
 	expected := filepath.Join(tmpDir, ".ollama", "config.json")
 	if path != expected {
 		t.Errorf("expected %s, got %s", expected, path)
 	}
 }
 func TestLoad(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	t.Run("returns empty config when file does not exist", func(t *testing.T) {
 		cfg, err := load()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if cfg == nil {
 			t.Fatal("expected non-nil config")
 		}
 		if cfg.Integrations == nil {
 			t.Error("expected non-nil Integrations map")
 		}
 		if len(cfg.Integrations) != 0 {
 			t.Errorf("expected empty Integrations, got %d", len(cfg.Integrations))
 		}
 	})
 	t.Run("loads existing config", func(t *testing.T) {
 		path, _ := configPath()
 		os.MkdirAll(filepath.Dir(path), 0o755)
 		os.WriteFile(path, []byte(`{"integrations":{"test":{"models":["model-a"]}}}`), 0o644)
 		cfg, err := load()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if cfg.Integrations["test"] == nil {
 			t.Fatal("expected test integration")
 		}
 		if len(cfg.Integrations["test"].Models) != 1 {
 			t.Errorf("expected 1 model, got %d", len(cfg.Integrations["test"].Models))
 		}
 	})
 	t.Run("returns error for corrupted JSON", func(t *testing.T) {
 		path, _ := configPath()
 		os.MkdirAll(filepath.Dir(path), 0o755)
 		os.WriteFile(path, []byte(`{corrupted`), 0o644)
 		_, err := load()
 		if err == nil {
 			t.Error("expected error for corrupted JSON")
 		}
 	})
 }
 func TestMigrateConfig(t *testing.T) {
 	t.Run("migrates legacy file to new location", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
 		os.MkdirAll(legacyDir, 0o755)
 		data := []byte(`{"integrations":{"claude":{"models":["llama3.2"]}}}`)
 		os.WriteFile(filepath.Join(legacyDir, "config.json"), data, 0o644)
 		migrated, err := migrateConfig()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if !migrated {
 			t.Fatal("expected migration to occur")
 		}
 		newPath, _ := configPath()
 		got, err := os.ReadFile(newPath)
 		if err != nil {
 			t.Fatalf("new config not found: %v", err)
 		}
 		if string(got) != string(data) {
 			t.Errorf("content mismatch: got %s", got)
 		}
 		if _, err := os.Stat(filepath.Join(legacyDir, "config.json")); !os.IsNotExist(err) {
 			t.Error("legacy file should have been removed")
 		}
 		if _, err := os.Stat(legacyDir); !os.IsNotExist(err) {
 			t.Error("legacy directory should have been removed")
 		}
 	})
 	t.Run("no-op when no legacy file exists", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		migrated, err := migrateConfig()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if migrated {
 			t.Error("expected no migration")
 		}
 	})
 	t.Run("skips corrupt legacy file", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
 		os.MkdirAll(legacyDir, 0o755)
 		os.WriteFile(filepath.Join(legacyDir, "config.json"), []byte(`{corrupt`), 0o644)
 		migrated, err := migrateConfig()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if migrated {
 			t.Error("should not migrate corrupt file")
 		}
 		if _, err := os.Stat(filepath.Join(legacyDir, "config.json")); os.IsNotExist(err) {
 			t.Error("corrupt legacy file should not have been deleted")
 		}
 	})
 	t.Run("new path takes precedence over legacy", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
 		os.MkdirAll(legacyDir, 0o755)
 		os.WriteFile(filepath.Join(legacyDir, "config.json"), []byte(`{"integrations":{"old":{"models":["old-model"]}}}`), 0o644)
 		newDir := filepath.Join(tmpDir, ".ollama")
 		os.WriteFile(filepath.Join(newDir, "config.json"), []byte(`{"integrations":{"new":{"models":["new-model"]}}}`), 0o644)
 		cfg, err := load()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if _, ok := cfg.Integrations["new"]; !ok {
 			t.Error("expected new-path integration to be loaded")
 		}
 		if _, ok := cfg.Integrations["old"]; ok {
 			t.Error("legacy integration should not have been loaded")
 		}
 	})
 	t.Run("idempotent when called twice", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
 		os.MkdirAll(legacyDir, 0o755)
 		os.WriteFile(filepath.Join(legacyDir, "config.json"), []byte(`{"integrations":{}}`), 0o644)
 		if _, err := migrateConfig(); err != nil {
 			t.Fatal(err)
 		}
 		migrated, err := migrateConfig()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if migrated {
 			t.Error("second migration should be a no-op")
 		}
 	})
 	t.Run("legacy directory preserved if not empty", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
 		os.MkdirAll(legacyDir, 0o755)
 		os.WriteFile(filepath.Join(legacyDir, "config.json"), []byte(`{"integrations":{}}`), 0o644)
 		os.WriteFile(filepath.Join(legacyDir, "other-file.txt"), []byte("keep me"), 0o644)
 		if _, err := migrateConfig(); err != nil {
 			t.Fatal(err)
 		}
 		if _, err := os.Stat(legacyDir); os.IsNotExist(err) {
 			t.Error("directory with other files should not have been removed")
 		}
 		if _, err := os.Stat(filepath.Join(legacyDir, "other-file.txt")); os.IsNotExist(err) {
 			t.Error("other files in legacy directory should be untouched")
 		}
 	})
 	t.Run("save writes to new path after migration", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
 		os.MkdirAll(legacyDir, 0o755)
 		os.WriteFile(filepath.Join(legacyDir, "config.json"), []byte(`{"integrations":{"claude":{"models":["llama3.2"]}}}`), 0o644)
 		// load triggers migration, then save should write to new path
 		if err := SaveIntegration("codex", []string{"qwen2.5"}); err != nil {
 			t.Fatal(err)
 		}
 		newPath := filepath.Join(tmpDir, ".ollama", "config.json")
 		if _, err := os.Stat(newPath); os.IsNotExist(err) {
 			t.Error("save should write to new path")
 		}
 		// old path should not be recreated
 		if _, err := os.Stat(filepath.Join(legacyDir, "config.json")); !os.IsNotExist(err) {
 			t.Error("save should not recreate legacy path")
 		}
 	})
 	t.Run("load triggers migration transparently", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
 		os.MkdirAll(legacyDir, 0o755)
 		os.WriteFile(filepath.Join(legacyDir, "config.json"), []byte(`{"integrations":{"claude":{"models":["llama3.2"]}}}`), 0o644)
 		cfg, err := load()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if cfg.Integrations["claude"] == nil || cfg.Integrations["claude"].Models[0] != "llama3.2" {
 			t.Error("migration via load() did not preserve data")
 		}
 	})
 }
 func TestSave(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	t.Run("creates config file", func(t *testing.T) {
 		cfg := &config{
 			Integrations: map[string]*integration{
 				"test": {Models: []string{"model-a", "model-b"}},
 			},
 		}
 		if err := save(cfg); err != nil {
 			t.Fatal(err)
 		}
 		path, _ := configPath()
 		if _, err := os.Stat(path); os.IsNotExist(err) {
 			t.Error("config file was not created")
 		}
 	})
 	t.Run("round-trip preserves data", func(t *testing.T) {
 		cfg := &config{
 			Integrations: map[string]*integration{
 				"claude": {Models: []string{"llama3.2", "mistral"}},
 				"codex":  {Models: []string{"qwen2.5"}},
 			},
 		}
 		if err := save(cfg); err != nil {
 			t.Fatal(err)
 		}
 		loaded, err := load()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if len(loaded.Integrations) != 2 {
 			t.Errorf("expected 2 integrations, got %d", len(loaded.Integrations))
 		}
 		if loaded.Integrations["claude"] == nil {
 			t.Error("missing claude integration")
 		}
 		if len(loaded.Integrations["claude"].Models) != 2 {
 			t.Errorf("expected 2 models for claude, got %d", len(loaded.Integrations["claude"].Models))
 		}
 	})
 }
--- a/cmd/editor_unix.go
+++ b/cmd/editor_unix.go
@@ -0,0 +1,5 @@
 //go:build !windows
 package cmd
 const defaultEditor = "vi"
--- a/cmd/editor_windows.go
+++ b/cmd/editor_windows.go
@@ -0,0 +1,5 @@
 //go:build windows
 package cmd
 const defaultEditor = "edit"
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -7,6 +7,7 @@ import (
 	"io"
 	"net/http"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"regexp"
 	"slices"
@@ -16,6 +17,7 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/internal/modelref"
 	"github.com/ollama/ollama/readline"
 	"github.com/ollama/ollama/types/errtypes"
 	"github.com/ollama/ollama/types/model"
@@ -45,7 +47,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.")
 		if opts.MultiModal {
-			fmt.Fprintf(os.Stderr, "Use %s to include .jpg, .png, or .webp images.\n", filepath.FromSlash("/path/to/file"))
+			fmt.Fprintf(os.Stderr, "Use %s to include .jpg, .png, .webp images, or .wav audio files.\n", filepath.FromSlash("/path/to/file"))
 		}
 		fmt.Fprintln(os.Stderr, "")
@@ -79,6 +81,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "  Ctrl + w            Delete the word before the cursor")
 		fmt.Fprintln(os.Stderr, "")
 		fmt.Fprintln(os.Stderr, "  Ctrl + l            Clear the screen")
 		fmt.Fprintln(os.Stderr, "  Ctrl + g            Open default editor to compose a prompt")
 		fmt.Fprintln(os.Stderr, "  Ctrl + c            Stop the model from responding")
 		fmt.Fprintln(os.Stderr, "  Ctrl + d            Exit ollama (/bye)")
 		fmt.Fprintln(os.Stderr, "")
@@ -147,6 +150,18 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 			scanner.Prompt.UseAlt = false
 			sb.Reset()
 			continue
 		case errors.Is(err, readline.ErrEditPrompt):
 			sb.Reset()
 			content, err := editInExternalEditor(line)
 			if err != nil {
 				fmt.Fprintf(os.Stderr, "error: %v\n", err)
 				continue
 			}
 			if strings.TrimSpace(content) == "" {
 				continue
 			}
 			scanner.Prefill = content
 			continue
 		case err != nil:
 			return err
@@ -159,6 +174,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 			sb.WriteString(before)
 			if !ok {
 				fmt.Fprintln(&sb)
 				scanner.Prompt.UseAlt = true
 				continue
 			}
@@ -198,10 +214,17 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 			}
 			origOpts := opts.Copy()
 			client, err := api.ClientFromEnvironment()
 			if err != nil {
 				fmt.Println("error: couldn't connect to ollama server")
 				return err
 			}
 			opts.Model = args[1]
 			opts.Messages = []api.Message{}
 			opts.LoadedMessages = nil
 			fmt.Printf("Loading model '%s'\n", opts.Model)
-			opts.Think, err = inferThinkingOption(nil, &opts, thinkExplicitlySet)
+			info, err := client.Show(cmd.Context(), &api.ShowRequest{Model: opts.Model})
 			if err != nil {
 				if strings.Contains(err.Error(), "not found") {
 					fmt.Printf("Couldn't find model '%s'\n", opts.Model)
@@ -210,6 +233,11 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 				}
 				return err
 			}
 			applyShowResponseToRunOptions(&opts, info)
 			opts.Think, err = inferThinkingOption(&info.Capabilities, &opts, thinkExplicitlySet)
 			if err != nil {
 				return err
 			}
 			if err := loadOrUnloadModel(cmd, &opts); err != nil {
 				if strings.Contains(err.Error(), "not found") {
 					fmt.Printf("Couldn't find model '%s'\n", opts.Model)
@@ -525,6 +553,13 @@ func NewCreateRequest(name string, opts runOptions) *api.CreateRequest {
 		parentModel = ""
 	}
 	// Preserve explicit cloud intent for sessions started with `:cloud`.
 	// Cloud model metadata can return a source-less parent_model (for example
 	// "qwen3.5"), which would otherwise make `/save` create a local derivative.
 	if modelref.HasExplicitCloudSource(opts.Model) && !modelref.HasExplicitCloudSource(parentModel) {
 		parentModel = ""
 	}
 	req := &api.CreateRequest{
 		Model: name,
 		From:  cmp.Or(parentModel, opts.Model),
@@ -538,8 +573,10 @@ func NewCreateRequest(name string, opts runOptions) *api.CreateRequest {
 		req.Parameters = opts.Options
 	}
-	if len(opts.Messages) > 0 {
+	messages := slices.Clone(opts.LoadedMessages)
-		req.Messages = opts.Messages
+	messages = append(messages, opts.Messages...)
 	if len(messages) > 0 {
 		req.Messages = messages
 	}
 	return req
@@ -569,7 +606,7 @@ func extractFileNames(input string) []string {
 	// Regex to match file paths starting with optional drive letter, / ./ \ or .\ and include escaped or unescaped spaces (\ or %20)
 	// and followed by more characters and a file extension
 	// This will capture non filename strings, but we'll check for file existence to remove mismatches
-	regexPattern := `(?:[a-zA-Z]:)?(?:\./|/|\\)[\S\\ ]+?\.(?i:jpg|jpeg|png|webp)\b`
+	regexPattern := `(?:[a-zA-Z]:)?(?:\./|/|\\)[\S\\ ]+?\.(?i:jpg|jpeg|png|webp|wav)\b`
 	re := regexp.MustCompile(regexPattern)
 	return re.FindAllString(input, -1)
@@ -585,10 +622,16 @@ func extractFileData(input string) (string, []api.ImageData, error) {
 		if errors.Is(err, os.ErrNotExist) {
 			continue
 		} else if err != nil {
-			fmt.Fprintf(os.Stderr, "Couldn't process image: %q\n", err)
+			fmt.Fprintf(os.Stderr, "Couldn't process file: %q\n", err)
 			return "", imgs, err
 		}
-		fmt.Fprintf(os.Stderr, "Added image '%s'\n", nfp)
+		ext := strings.ToLower(filepath.Ext(nfp))
 		switch ext {
 		case ".wav":
 			fmt.Fprintf(os.Stderr, "Added audio '%s'\n", nfp)
 		default:
 			fmt.Fprintf(os.Stderr, "Added image '%s'\n", nfp)
 		}
 		input = strings.ReplaceAll(input, "'"+nfp+"'", "")
 		input = strings.ReplaceAll(input, "'"+fp+"'", "")
 		input = strings.ReplaceAll(input, fp, "")
@@ -597,6 +640,57 @@ func extractFileData(input string) (string, []api.ImageData, error) {
 	return strings.TrimSpace(input), imgs, nil
 }
 func editInExternalEditor(content string) (string, error) {
 	editor := envconfig.Editor()
 	if editor == "" {
 		editor = os.Getenv("VISUAL")
 	}
 	if editor == "" {
 		editor = os.Getenv("EDITOR")
 	}
 	if editor == "" {
 		editor = defaultEditor
 	}
 	// Check that the editor binary exists
 	name := strings.Fields(editor)[0]
 	if _, err := exec.LookPath(name); err != nil {
 		return "", fmt.Errorf("editor %q not found, set OLLAMA_EDITOR to the path of your preferred editor", name)
 	}
 	tmpFile, err := os.CreateTemp("", "ollama-prompt-*.txt")
 	if err != nil {
 		return "", fmt.Errorf("creating temp file: %w", err)
 	}
 	defer os.Remove(tmpFile.Name())
 	if content != "" {
 		if _, err := tmpFile.WriteString(content); err != nil {
 			tmpFile.Close()
 			return "", fmt.Errorf("writing to temp file: %w", err)
 		}
 	}
 	tmpFile.Close()
 	args := strings.Fields(editor)
 	args = append(args, tmpFile.Name())
 	cmd := exec.Command(args[0], args[1:]...)
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	if err := cmd.Run(); err != nil {
 		return "", fmt.Errorf("editor exited with error: %w", err)
 	}
 	data, err := os.ReadFile(tmpFile.Name())
 	if err != nil {
 		return "", fmt.Errorf("reading temp file: %w", err)
 	}
 	return strings.TrimRight(string(data), "\n"), nil
 }
 func getImageData(filePath string) ([]byte, error) {
 	file, err := os.Open(filePath)
 	if err != nil {
@@ -611,9 +705,9 @@ func getImageData(filePath string) ([]byte, error) {
 	}
 	contentType := http.DetectContentType(buf)
-	allowedTypes := []string{"image/jpeg", "image/jpg", "image/png", "image/webp"}
+	allowedTypes := []string{"image/jpeg", "image/jpg", "image/png", "image/webp", "audio/wave"}
 	if !slices.Contains(allowedTypes, contentType) {
-		return nil, fmt.Errorf("invalid image type: %s", contentType)
+		return nil, fmt.Errorf("invalid file type: %s", contentType)
 	}
 	info, err := file.Stat()
@@ -621,8 +715,7 @@ func getImageData(filePath string) ([]byte, error) {
 		return nil, err
 	}
-	// Check if the file size exceeds 100MB
+	var maxSize int64 = 100 * 1024 * 1024 // 100MB
 	var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
 	if info.Size() > maxSize {
 		return nil, errors.New("file size exceeds maximum limit (100MB)")
 	}
--- a/cmd/interactive_test.go
+++ b/cmd/interactive_test.go
@@ -84,3 +84,33 @@ func TestExtractFileDataRemovesQuotedFilepath(t *testing.T) {
 	assert.Len(t, imgs, 1)
 	assert.Equal(t, cleaned, "before  after")
 }
 func TestExtractFileDataWAV(t *testing.T) {
 	dir := t.TempDir()
 	fp := filepath.Join(dir, "sample.wav")
 	data := make([]byte, 600)
 	copy(data[:44], []byte{
 		'R', 'I', 'F', 'F',
 		0x58, 0x02, 0x00, 0x00, // file size - 8
 		'W', 'A', 'V', 'E',
 		'f', 'm', 't', ' ',
 		0x10, 0x00, 0x00, 0x00, // fmt chunk size
 		0x01, 0x00, // PCM
 		0x01, 0x00, // mono
 		0x80, 0x3e, 0x00, 0x00, // 16000 Hz
 		0x00, 0x7d, 0x00, 0x00, // byte rate
 		0x02, 0x00, // block align
 		0x10, 0x00, // 16-bit
 		'd', 'a', 't', 'a',
 		0x34, 0x02, 0x00, 0x00, // data size
 	})
 	if err := os.WriteFile(fp, data, 0o600); err != nil {
 		t.Fatalf("failed to write test audio: %v", err)
 	}
 	input := "before " + fp + " after"
 	cleaned, imgs, err := extractFileData(input)
 	assert.NoError(t, err)
 	assert.Len(t, imgs, 1)
 	assert.Equal(t, "before  after", cleaned)
 }
--- a/cmd/internal/fileutil/files.go
+++ b/cmd/internal/fileutil/files.go
@@ -0,0 +1,103 @@
 // Package fileutil provides small shared helpers for reading JSON files
 // and writing config files with backup-on-overwrite semantics.
 package fileutil
 import (
 	"bytes"
 	"encoding/json"
 	"fmt"
 	"os"
 	"path/filepath"
 	"time"
 )
 // ReadJSON reads a JSON object file into a generic map.
 func ReadJSON(path string) (map[string]any, error) {
 	data, err := os.ReadFile(path)
 	if err != nil {
 		return nil, err
 	}
 	var result map[string]any
 	if err := json.Unmarshal(data, &result); err != nil {
 		return nil, err
 	}
 	return result, nil
 }
 func copyFile(src, dst string) error {
 	info, err := os.Stat(src)
 	if err != nil {
 		return err
 	}
 	data, err := os.ReadFile(src)
 	if err != nil {
 		return err
 	}
 	return os.WriteFile(dst, data, info.Mode().Perm())
 }
 // BackupDir returns the shared backup directory used before overwriting files.
 func BackupDir() string {
 	return filepath.Join(os.TempDir(), "ollama-backups")
 }
 func backupToTmp(srcPath string) (string, error) {
 	dir := BackupDir()
 	if err := os.MkdirAll(dir, 0o755); err != nil {
 		return "", err
 	}
 	backupPath := filepath.Join(dir, fmt.Sprintf("%s.%d", filepath.Base(srcPath), time.Now().Unix()))
 	if err := copyFile(srcPath, backupPath); err != nil {
 		return "", err
 	}
 	return backupPath, nil
 }
 // WriteWithBackup writes data to path via temp file + rename, backing up any existing file first.
 func WriteWithBackup(path string, data []byte) error {
 	var backupPath string
 	// backup must be created before any writes to the target file
 	if existingContent, err := os.ReadFile(path); err == nil {
 		if !bytes.Equal(existingContent, data) {
 			backupPath, err = backupToTmp(path)
 			if err != nil {
 				return fmt.Errorf("backup failed: %w", err)
 			}
 		}
 	} else if !os.IsNotExist(err) {
 		return fmt.Errorf("read existing file: %w", err)
 	}
 	dir := filepath.Dir(path)
 	tmp, err := os.CreateTemp(dir, ".tmp-*")
 	if err != nil {
 		return fmt.Errorf("create temp failed: %w", err)
 	}
 	tmpPath := tmp.Name()
 	if _, err := tmp.Write(data); err != nil {
 		_ = tmp.Close()
 		_ = os.Remove(tmpPath)
 		return fmt.Errorf("write failed: %w", err)
 	}
 	if err := tmp.Sync(); err != nil {
 		_ = tmp.Close()
 		_ = os.Remove(tmpPath)
 		return fmt.Errorf("sync failed: %w", err)
 	}
 	if err := tmp.Close(); err != nil {
 		_ = os.Remove(tmpPath)
 		return fmt.Errorf("close failed: %w", err)
 	}
 	if err := os.Rename(tmpPath, path); err != nil {
 		_ = os.Remove(tmpPath)
 		if backupPath != "" {
 			_ = copyFile(backupPath, path)
 		}
 		return fmt.Errorf("rename failed: %w", err)
 	}
 	return nil
 }
--- a/cmd/internal/fileutil/files_test.go
+++ b/cmd/internal/fileutil/files_test.go
@@ -0,0 +1,522 @@
 package fileutil
 import (
 	"encoding/json"
 	"fmt"
 	"os"
 	"path/filepath"
 	"runtime"
 	"testing"
 )
 func TestMain(m *testing.M) {
 	tmpRoot, err := os.MkdirTemp("", "fileutil-test-*")
 	if err != nil {
 		panic(err)
 	}
 	if err := os.Setenv("TMPDIR", tmpRoot); err != nil {
 		panic(err)
 	}
 	code := m.Run()
 	_ = os.RemoveAll(tmpRoot)
 	os.Exit(code)
 }
 func mustMarshal(t *testing.T, v any) []byte {
 	t.Helper()
 	data, err := json.MarshalIndent(v, "", "  ")
 	if err != nil {
 		t.Fatal(err)
 	}
 	return data
 }
 func isolatedTempDir(t *testing.T) string {
 	t.Helper()
 	return t.TempDir()
 }
 func TestWriteWithBackup(t *testing.T) {
 	tmpDir := isolatedTempDir(t)
 	t.Run("creates file", func(t *testing.T) {
 		path := filepath.Join(tmpDir, "new.json")
 		data := mustMarshal(t, map[string]string{"key": "value"})
 		if err := WriteWithBackup(path, data); err != nil {
 			t.Fatal(err)
 		}
 		content, err := os.ReadFile(path)
 		if err != nil {
 			t.Fatal(err)
 		}
 		var result map[string]string
 		if err := json.Unmarshal(content, &result); err != nil {
 			t.Fatal(err)
 		}
 		if result["key"] != "value" {
 			t.Errorf("expected value, got %s", result["key"])
 		}
 	})
 	t.Run("creates backup in the temp backup directory", func(t *testing.T) {
 		path := filepath.Join(tmpDir, "backup.json")
 		os.WriteFile(path, []byte(`{"original": true}`), 0o644)
 		data := mustMarshal(t, map[string]bool{"updated": true})
 		if err := WriteWithBackup(path, data); err != nil {
 			t.Fatal(err)
 		}
 		entries, err := os.ReadDir(BackupDir())
 		if err != nil {
 			t.Fatal("backup directory not created")
 		}
 		var foundBackup bool
 		for _, entry := range entries {
 			if filepath.Ext(entry.Name()) != ".json" {
 				name := entry.Name()
 				if len(name) > len("backup.json.") && name[:len("backup.json.")] == "backup.json." {
 					backupPath := filepath.Join(BackupDir(), name)
 					backup, err := os.ReadFile(backupPath)
 					if err == nil {
 						var backupData map[string]bool
 						json.Unmarshal(backup, &backupData)
 						if backupData["original"] {
 							foundBackup = true
 							os.Remove(backupPath)
 							break
 						}
 					}
 				}
 			}
 		}
 		if !foundBackup {
 			t.Error("backup file not created in backup directory")
 		}
 		current, _ := os.ReadFile(path)
 		var currentData map[string]bool
 		json.Unmarshal(current, &currentData)
 		if !currentData["updated"] {
 			t.Error("file doesn't contain updated data")
 		}
 	})
 	t.Run("no backup for new file", func(t *testing.T) {
 		path := filepath.Join(tmpDir, "nobak.json")
 		data := mustMarshal(t, map[string]string{"new": "file"})
 		if err := WriteWithBackup(path, data); err != nil {
 			t.Fatal(err)
 		}
 		entries, _ := os.ReadDir(BackupDir())
 		for _, entry := range entries {
 			if len(entry.Name()) > len("nobak.json.") && entry.Name()[:len("nobak.json.")] == "nobak.json." {
 				t.Error("backup should not exist for new file")
 			}
 		}
 	})
 	t.Run("no backup when content unchanged", func(t *testing.T) {
 		path := filepath.Join(tmpDir, "unchanged.json")
 		data := mustMarshal(t, map[string]string{"key": "value"})
 		if err := WriteWithBackup(path, data); err != nil {
 			t.Fatal(err)
 		}
 		entries1, _ := os.ReadDir(BackupDir())
 		countBefore := 0
 		for _, e := range entries1 {
 			if len(e.Name()) > len("unchanged.json.") && e.Name()[:len("unchanged.json.")] == "unchanged.json." {
 				countBefore++
 			}
 		}
 		if err := WriteWithBackup(path, data); err != nil {
 			t.Fatal(err)
 		}
 		entries2, _ := os.ReadDir(BackupDir())
 		countAfter := 0
 		for _, e := range entries2 {
 			if len(e.Name()) > len("unchanged.json.") && e.Name()[:len("unchanged.json.")] == "unchanged.json." {
 				countAfter++
 			}
 		}
 		if countAfter != countBefore {
 			t.Errorf("backup was created when content unchanged (before=%d, after=%d)", countBefore, countAfter)
 		}
 	})
 	t.Run("backup filename contains unix timestamp", func(t *testing.T) {
 		path := filepath.Join(tmpDir, "timestamped.json")
 		os.WriteFile(path, []byte(`{"v": 1}`), 0o644)
 		data := mustMarshal(t, map[string]int{"v": 2})
 		if err := WriteWithBackup(path, data); err != nil {
 			t.Fatal(err)
 		}
 		entries, _ := os.ReadDir(BackupDir())
 		var found bool
 		for _, entry := range entries {
 			name := entry.Name()
 			if len(name) > len("timestamped.json.") && name[:len("timestamped.json.")] == "timestamped.json." {
 				timestamp := name[len("timestamped.json."):]
 				for _, c := range timestamp {
 					if c < '0' || c > '9' {
 						t.Errorf("backup filename timestamp contains non-numeric character: %s", name)
 					}
 				}
 				found = true
 				os.Remove(filepath.Join(BackupDir(), name))
 				break
 			}
 		}
 		if !found {
 			t.Error("backup file with timestamp not found")
 		}
 	})
 }
 // Edge case tests for files.go
 // TestWriteWithBackup_FailsIfBackupFails documents critical behavior: if backup fails, we must not proceed.
 // User could lose their config with no way to recover.
 func TestWriteWithBackup_FailsIfBackupFails(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("permission tests unreliable on Windows")
 	}
 	tmpDir := isolatedTempDir(t)
 	path := filepath.Join(tmpDir, "config.json")
 	// Create original file
 	originalContent := []byte(`{"original": true}`)
 	os.WriteFile(path, originalContent, 0o644)
 	// Make backup directory read-only to force backup failure
 	backupDir := BackupDir()
 	os.MkdirAll(backupDir, 0o755)
 	os.Chmod(backupDir, 0o444) // Read-only
 	defer os.Chmod(backupDir, 0o755)
 	newContent := []byte(`{"updated": true}`)
 	err := WriteWithBackup(path, newContent)
 	// Should fail because backup couldn't be created
 	if err == nil {
 		t.Error("expected error when backup fails, got nil")
 	}
 	// Original file should be preserved
 	current, _ := os.ReadFile(path)
 	if string(current) != string(originalContent) {
 		t.Errorf("original file was modified despite backup failure: got %s", string(current))
 	}
 }
 // TestWriteWithBackup_PermissionDenied verifies clear error when target file has wrong permissions.
 // Common issue when config owned by root or wrong perms.
 func TestWriteWithBackup_PermissionDenied(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("permission tests unreliable on Windows")
 	}
 	tmpDir := isolatedTempDir(t)
 	// Create a read-only directory
 	readOnlyDir := filepath.Join(tmpDir, "readonly")
 	os.MkdirAll(readOnlyDir, 0o755)
 	os.Chmod(readOnlyDir, 0o444)
 	defer os.Chmod(readOnlyDir, 0o755)
 	path := filepath.Join(readOnlyDir, "config.json")
 	err := WriteWithBackup(path, []byte(`{"test": true}`))
 	if err == nil {
 		t.Error("expected permission error, got nil")
 	}
 }
 // TestWriteWithBackup_DirectoryDoesNotExist verifies behavior when target directory doesn't exist.
 // writeWithBackup doesn't create directories - caller is responsible.
 func TestWriteWithBackup_DirectoryDoesNotExist(t *testing.T) {
 	tmpDir := isolatedTempDir(t)
 	path := filepath.Join(tmpDir, "nonexistent", "subdir", "config.json")
 	err := WriteWithBackup(path, []byte(`{"test": true}`))
 	// Should fail because directory doesn't exist
 	if err == nil {
 		t.Error("expected error for nonexistent directory, got nil")
 	}
 }
 // TestWriteWithBackup_SymlinkTarget documents behavior when target is a symlink.
 // Documents what happens if user symlinks their config file.
 func TestWriteWithBackup_SymlinkTarget(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("symlink tests may require admin on Windows")
 	}
 	tmpDir := isolatedTempDir(t)
 	realFile := filepath.Join(tmpDir, "real.json")
 	symlink := filepath.Join(tmpDir, "link.json")
 	// Create real file and symlink
 	os.WriteFile(realFile, []byte(`{"v": 1}`), 0o644)
 	os.Symlink(realFile, symlink)
 	// Write through symlink
 	err := WriteWithBackup(symlink, []byte(`{"v": 2}`))
 	if err != nil {
 		t.Fatalf("writeWithBackup through symlink failed: %v", err)
 	}
 	// The real file should be updated (symlink followed for temp file creation)
 	content, _ := os.ReadFile(symlink)
 	if string(content) != `{"v": 2}` {
 		t.Errorf("symlink target not updated correctly: got %s", string(content))
 	}
 }
 // TestBackupToTmp_SpecialCharsInFilename verifies backup works with special characters.
 // User may have config files with unusual names.
 func TestBackupToTmp_SpecialCharsInFilename(t *testing.T) {
 	tmpDir := isolatedTempDir(t)
 	// File with spaces and special chars
 	path := filepath.Join(tmpDir, "my config (backup).json")
 	os.WriteFile(path, []byte(`{"test": true}`), 0o644)
 	backupPath, err := backupToTmp(path)
 	if err != nil {
 		t.Fatalf("backupToTmp with special chars failed: %v", err)
 	}
 	// Verify backup exists and has correct content
 	content, err := os.ReadFile(backupPath)
 	if err != nil {
 		t.Fatalf("could not read backup: %v", err)
 	}
 	if string(content) != `{"test": true}` {
 		t.Errorf("backup content mismatch: got %s", string(content))
 	}
 	os.Remove(backupPath)
 }
 // TestCopyFile_PreservesPermissions verifies that copyFile preserves file permissions.
 func TestCopyFile_PreservesPermissions(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("permission preservation tests unreliable on Windows")
 	}
 	tmpDir := isolatedTempDir(t)
 	src := filepath.Join(tmpDir, "src.json")
 	dst := filepath.Join(tmpDir, "dst.json")
 	// Create source with specific permissions
 	os.WriteFile(src, []byte(`{"test": true}`), 0o600)
 	err := copyFile(src, dst)
 	if err != nil {
 		t.Fatalf("copyFile failed: %v", err)
 	}
 	srcInfo, _ := os.Stat(src)
 	dstInfo, _ := os.Stat(dst)
 	if srcInfo.Mode().Perm() != dstInfo.Mode().Perm() {
 		t.Errorf("permissions not preserved: src=%v, dst=%v", srcInfo.Mode().Perm(), dstInfo.Mode().Perm())
 	}
 }
 // TestCopyFile_SourceNotFound verifies clear error when source doesn't exist.
 func TestCopyFile_SourceNotFound(t *testing.T) {
 	tmpDir := isolatedTempDir(t)
 	src := filepath.Join(tmpDir, "nonexistent.json")
 	dst := filepath.Join(tmpDir, "dst.json")
 	err := copyFile(src, dst)
 	if err == nil {
 		t.Error("expected error for nonexistent source, got nil")
 	}
 }
 // TestWriteWithBackup_TargetIsDirectory verifies error when path points to a directory.
 func TestWriteWithBackup_TargetIsDirectory(t *testing.T) {
 	tmpDir := isolatedTempDir(t)
 	dirPath := filepath.Join(tmpDir, "actualdir")
 	os.MkdirAll(dirPath, 0o755)
 	err := WriteWithBackup(dirPath, []byte(`{"test": true}`))
 	if err == nil {
 		t.Error("expected error when target is a directory, got nil")
 	}
 }
 // TestWriteWithBackup_EmptyData verifies writing zero bytes works correctly.
 func TestWriteWithBackup_EmptyData(t *testing.T) {
 	tmpDir := isolatedTempDir(t)
 	path := filepath.Join(tmpDir, "empty.json")
 	err := WriteWithBackup(path, []byte{})
 	if err != nil {
 		t.Fatalf("writeWithBackup with empty data failed: %v", err)
 	}
 	content, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatalf("could not read file: %v", err)
 	}
 	if len(content) != 0 {
 		t.Errorf("expected empty file, got %d bytes", len(content))
 	}
 }
 // TestWriteWithBackup_FileUnreadableButDirWritable verifies behavior when existing file
 // cannot be read (for backup comparison) but directory is writable.
 func TestWriteWithBackup_FileUnreadableButDirWritable(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("permission tests unreliable on Windows")
 	}
 	tmpDir := isolatedTempDir(t)
 	path := filepath.Join(tmpDir, "unreadable.json")
 	// Create file and make it unreadable
 	os.WriteFile(path, []byte(`{"original": true}`), 0o644)
 	os.Chmod(path, 0o000)
 	defer os.Chmod(path, 0o644)
 	// Should fail because we can't read the file to compare/backup
 	err := WriteWithBackup(path, []byte(`{"updated": true}`))
 	if err == nil {
 		t.Error("expected error when file is unreadable, got nil")
 	}
 }
 // TestWriteWithBackup_RapidSuccessiveWrites verifies backup works with multiple writes
 // within the same second (timestamp collision scenario).
 func TestWriteWithBackup_RapidSuccessiveWrites(t *testing.T) {
 	tmpDir := isolatedTempDir(t)
 	path := filepath.Join(tmpDir, "rapid.json")
 	// Create initial file
 	os.WriteFile(path, []byte(`{"v": 0}`), 0o644)
 	// Rapid successive writes
 	for i := 1; i <= 3; i++ {
 		data := []byte(fmt.Sprintf(`{"v": %d}`, i))
 		if err := WriteWithBackup(path, data); err != nil {
 			t.Fatalf("write %d failed: %v", i, err)
 		}
 	}
 	// Verify final content
 	content, _ := os.ReadFile(path)
 	if string(content) != `{"v": 3}` {
 		t.Errorf("expected final content {\"v\": 3}, got %s", string(content))
 	}
 	// Verify at least one backup exists
 	entries, _ := os.ReadDir(BackupDir())
 	var backupCount int
 	for _, e := range entries {
 		if len(e.Name()) > len("rapid.json.") && e.Name()[:len("rapid.json.")] == "rapid.json." {
 			backupCount++
 		}
 	}
 	if backupCount == 0 {
 		t.Error("expected at least one backup file from rapid writes")
 	}
 }
 // TestWriteWithBackup_BackupDirIsFile verifies error when backup directory path is a file.
 func TestWriteWithBackup_BackupDirIsFile(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("test modifies system temp directory")
 	}
 	tmpDir := isolatedTempDir(t)
 	// Create a file at the backup directory path
 	backupPath := BackupDir()
 	// Clean up any existing directory first
 	os.RemoveAll(backupPath)
 	// Create a file instead of directory
 	os.WriteFile(backupPath, []byte("not a directory"), 0o644)
 	defer func() {
 		os.Remove(backupPath)
 		os.MkdirAll(backupPath, 0o755)
 	}()
 	path := filepath.Join(tmpDir, "test.json")
 	os.WriteFile(path, []byte(`{"original": true}`), 0o644)
 	err := WriteWithBackup(path, []byte(`{"updated": true}`))
 	if err == nil {
 		t.Error("expected error when backup dir is a file, got nil")
 	}
 }
 // TestWriteWithBackup_NoOrphanTempFiles verifies temp files are cleaned up on failure.
 func TestWriteWithBackup_NoOrphanTempFiles(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("permission tests unreliable on Windows")
 	}
 	tmpDir := isolatedTempDir(t)
 	// Count existing temp files
 	countTempFiles := func() int {
 		entries, _ := os.ReadDir(tmpDir)
 		count := 0
 		for _, e := range entries {
 			if len(e.Name()) > 4 && e.Name()[:4] == ".tmp" {
 				count++
 			}
 		}
 		return count
 	}
 	before := countTempFiles()
 	// Create a file, then make directory read-only to cause rename failure
 	path := filepath.Join(tmpDir, "orphan.json")
 	os.WriteFile(path, []byte(`{"v": 1}`), 0o644)
 	// Make a subdirectory and try to write there after making parent read-only
 	subDir := filepath.Join(tmpDir, "subdir")
 	os.MkdirAll(subDir, 0o755)
 	subPath := filepath.Join(subDir, "config.json")
 	os.WriteFile(subPath, []byte(`{"v": 1}`), 0o644)
 	// Make subdir read-only after creating temp file would succeed but rename would fail
 	// This is tricky to test - the temp file is created in the same dir, so if we can't
 	// rename, we also couldn't create. Let's just verify normal failure cleanup works.
 	// Force a failure by making the target a directory
 	badPath := filepath.Join(tmpDir, "isdir")
 	os.MkdirAll(badPath, 0o755)
 	_ = WriteWithBackup(badPath, []byte(`{"test": true}`))
 	after := countTempFiles()
 	if after > before {
 		t.Errorf("orphan temp files left behind: before=%d, after=%d", before, after)
 	}
 }
--- a/cmd/launch/claude.go
+++ b/cmd/launch/claude.go
@@ -0,0 +1,87 @@
 package launch
 import (
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"github.com/ollama/ollama/envconfig"
 )
 // Claude implements Runner for Claude Code integration.
 type Claude struct{}
 func (c *Claude) String() string { return "Claude Code" }
 func (c *Claude) args(model string, extra []string) []string {
 	var args []string
 	if model != "" {
 		args = append(args, "--model", model)
 	}
 	args = append(args, extra...)
 	return args
 }
 func (c *Claude) findPath() (string, error) {
 	if p, err := exec.LookPath("claude"); err == nil {
 		return p, nil
 	}
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return "", err
 	}
 	name := "claude"
 	if runtime.GOOS == "windows" {
 		name = "claude.exe"
 	}
 	fallback := filepath.Join(home, ".claude", "local", name)
 	if _, err := os.Stat(fallback); err != nil {
 		return "", err
 	}
 	return fallback, nil
 }
 func (c *Claude) Run(model string, args []string) error {
 	claudePath, err := c.findPath()
 	if err != nil {
 		return fmt.Errorf("claude is not installed, install from https://code.claude.com/docs/en/quickstart")
 	}
 	cmd := exec.Command(claudePath, c.args(model, args)...)
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	env := append(os.Environ(),
 		"ANTHROPIC_BASE_URL="+envconfig.Host().String(),
 		"ANTHROPIC_API_KEY=",
 		"ANTHROPIC_AUTH_TOKEN=ollama",
 		"CLAUDE_CODE_ATTRIBUTION_HEADER=0",
 	)
 	env = append(env, c.modelEnvVars(model)...)
 	cmd.Env = env
 	return cmd.Run()
 }
 // modelEnvVars returns Claude Code env vars that route all model tiers through Ollama.
 func (c *Claude) modelEnvVars(model string) []string {
 	env := []string{
 		"ANTHROPIC_DEFAULT_OPUS_MODEL=" + model,
 		"ANTHROPIC_DEFAULT_SONNET_MODEL=" + model,
 		"ANTHROPIC_DEFAULT_HAIKU_MODEL=" + model,
 		"CLAUDE_CODE_SUBAGENT_MODEL=" + model,
 	}
 	if isCloudModelName(model) {
 		if l, ok := lookupCloudModelLimit(model); ok {
 			env = append(env, "CLAUDE_CODE_AUTO_COMPACT_WINDOW="+strconv.Itoa(l.Context))
 		}
 	}
 	return env
 }
--- a/cmd/launch/claude_test.go
+++ b/cmd/launch/claude_test.go
@@ -0,0 +1,171 @@
 package launch
 import (
 	"os"
 	"path/filepath"
 	"runtime"
 	"slices"
 	"strings"
 	"testing"
 )
 func TestClaudeIntegration(t *testing.T) {
 	c := &Claude{}
 	t.Run("String", func(t *testing.T) {
 		if got := c.String(); got != "Claude Code" {
 			t.Errorf("String() = %q, want %q", got, "Claude Code")
 		}
 	})
 	t.Run("implements Runner", func(t *testing.T) {
 		var _ Runner = c
 	})
 }
 func TestClaudeFindPath(t *testing.T) {
 	c := &Claude{}
 	t.Run("finds claude in PATH", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		name := "claude"
 		if runtime.GOOS == "windows" {
 			name = "claude.exe"
 		}
 		fakeBin := filepath.Join(tmpDir, name)
 		os.WriteFile(fakeBin, []byte("#!/bin/sh\n"), 0o755)
 		t.Setenv("PATH", tmpDir)
 		got, err := c.findPath()
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 		if got != fakeBin {
 			t.Errorf("findPath() = %q, want %q", got, fakeBin)
 		}
 	})
 	t.Run("falls back to ~/.claude/local/claude", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("PATH", t.TempDir()) // empty dir, no claude binary
 		name := "claude"
 		if runtime.GOOS == "windows" {
 			name = "claude.exe"
 		}
 		fallback := filepath.Join(tmpDir, ".claude", "local", name)
 		os.MkdirAll(filepath.Dir(fallback), 0o755)
 		os.WriteFile(fallback, []byte("#!/bin/sh\n"), 0o755)
 		got, err := c.findPath()
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 		if got != fallback {
 			t.Errorf("findPath() = %q, want %q", got, fallback)
 		}
 	})
 	t.Run("returns error when neither PATH nor fallback exists", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("PATH", t.TempDir()) // empty dir, no claude binary
 		_, err := c.findPath()
 		if err == nil {
 			t.Fatal("expected error, got nil")
 		}
 	})
 }
 func TestClaudeArgs(t *testing.T) {
 	c := &Claude{}
 	tests := []struct {
 		name  string
 		model string
 		args  []string
 		want  []string
 	}{
 		{"with model", "llama3.2", nil, []string{"--model", "llama3.2"}},
 		{"empty model", "", nil, nil},
 		{"with model and verbose", "llama3.2", []string{"--verbose"}, []string{"--model", "llama3.2", "--verbose"}},
 		{"empty model with help", "", []string{"--help"}, []string{"--help"}},
 		{"with allowed tools", "llama3.2", []string{"--allowedTools", "Read,Write,Bash"}, []string{"--model", "llama3.2", "--allowedTools", "Read,Write,Bash"}},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := c.args(tt.model, tt.args)
 			if !slices.Equal(got, tt.want) {
 				t.Errorf("args(%q, %v) = %v, want %v", tt.model, tt.args, got, tt.want)
 			}
 		})
 	}
 }
 func TestClaudeModelEnvVars(t *testing.T) {
 	c := &Claude{}
 	envMap := func(envs []string) map[string]string {
 		m := make(map[string]string)
 		for _, e := range envs {
 			k, v, _ := strings.Cut(e, "=")
 			m[k] = v
 		}
 		return m
 	}
 	t.Run("maps all Claude model env vars to the provided model", func(t *testing.T) {
 		got := envMap(c.modelEnvVars("llama3.2"))
 		if got["ANTHROPIC_DEFAULT_OPUS_MODEL"] != "llama3.2" {
 			t.Errorf("OPUS = %q, want llama3.2", got["ANTHROPIC_DEFAULT_OPUS_MODEL"])
 		}
 		if got["ANTHROPIC_DEFAULT_SONNET_MODEL"] != "llama3.2" {
 			t.Errorf("SONNET = %q, want llama3.2", got["ANTHROPIC_DEFAULT_SONNET_MODEL"])
 		}
 		if got["ANTHROPIC_DEFAULT_HAIKU_MODEL"] != "llama3.2" {
 			t.Errorf("HAIKU = %q, want llama3.2", got["ANTHROPIC_DEFAULT_HAIKU_MODEL"])
 		}
 		if got["CLAUDE_CODE_SUBAGENT_MODEL"] != "llama3.2" {
 			t.Errorf("SUBAGENT = %q, want llama3.2", got["CLAUDE_CODE_SUBAGENT_MODEL"])
 		}
 		if got["CLAUDE_CODE_AUTO_COMPACT_WINDOW"] != "" {
 			t.Errorf("AUTO_COMPACT_WINDOW = %q, want empty for local models", got["CLAUDE_CODE_AUTO_COMPACT_WINDOW"])
 		}
 	})
 	t.Run("supports empty model", func(t *testing.T) {
 		got := envMap(c.modelEnvVars(""))
 		if got["ANTHROPIC_DEFAULT_OPUS_MODEL"] != "" {
 			t.Errorf("OPUS = %q, want empty", got["ANTHROPIC_DEFAULT_OPUS_MODEL"])
 		}
 		if got["ANTHROPIC_DEFAULT_SONNET_MODEL"] != "" {
 			t.Errorf("SONNET = %q, want empty", got["ANTHROPIC_DEFAULT_SONNET_MODEL"])
 		}
 		if got["ANTHROPIC_DEFAULT_HAIKU_MODEL"] != "" {
 			t.Errorf("HAIKU = %q, want empty", got["ANTHROPIC_DEFAULT_HAIKU_MODEL"])
 		}
 		if got["CLAUDE_CODE_SUBAGENT_MODEL"] != "" {
 			t.Errorf("SUBAGENT = %q, want empty", got["CLAUDE_CODE_SUBAGENT_MODEL"])
 		}
 		if got["CLAUDE_CODE_AUTO_COMPACT_WINDOW"] != "" {
 			t.Errorf("AUTO_COMPACT_WINDOW = %q, want empty", got["CLAUDE_CODE_AUTO_COMPACT_WINDOW"])
 		}
 	})
 	t.Run("sets auto compact window for known cloud models", func(t *testing.T) {
 		got := envMap(c.modelEnvVars("glm-5:cloud"))
 		if got["CLAUDE_CODE_AUTO_COMPACT_WINDOW"] != "202752" {
 			t.Errorf("AUTO_COMPACT_WINDOW = %q, want 202752", got["CLAUDE_CODE_AUTO_COMPACT_WINDOW"])
 		}
 	})
 	t.Run("does not set auto compact window for unknown cloud models", func(t *testing.T) {
 		got := envMap(c.modelEnvVars("unknown-model:cloud"))
 		if got["CLAUDE_CODE_AUTO_COMPACT_WINDOW"] != "" {
 			t.Errorf("AUTO_COMPACT_WINDOW = %q, want empty", got["CLAUDE_CODE_AUTO_COMPACT_WINDOW"])
 		}
 	})
 }
--- a/cmd/launch/cline.go
+++ b/cmd/launch/cline.go
@@ -0,0 +1,104 @@
 package launch
 import (
 	"encoding/json"
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"github.com/ollama/ollama/cmd/internal/fileutil"
 	"github.com/ollama/ollama/envconfig"
 )
 // Cline implements Runner and Editor for the Cline CLI integration
 type Cline struct{}
 func (c *Cline) String() string { return "Cline" }
 func (c *Cline) Run(model string, args []string) error {
 	if _, err := exec.LookPath("cline"); err != nil {
 		return fmt.Errorf("cline is not installed, install with: npm install -g cline")
 	}
 	cmd := exec.Command("cline", args...)
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	return cmd.Run()
 }
 func (c *Cline) Paths() []string {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return nil
 	}
 	p := filepath.Join(home, ".cline", "data", "globalState.json")
 	if _, err := os.Stat(p); err == nil {
 		return []string{p}
 	}
 	return nil
 }
 func (c *Cline) Edit(models []string) error {
 	if len(models) == 0 {
 		return nil
 	}
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return err
 	}
 	configPath := filepath.Join(home, ".cline", "data", "globalState.json")
 	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
 		return err
 	}
 	config := make(map[string]any)
 	if data, err := os.ReadFile(configPath); err == nil {
 		if err := json.Unmarshal(data, &config); err != nil {
 			return fmt.Errorf("failed to parse config: %w, at: %s", err, configPath)
 		}
 	}
 	// Set Ollama as the provider for both act and plan modes
 	baseURL := envconfig.Host().String()
 	config["ollamaBaseUrl"] = baseURL
 	config["actModeApiProvider"] = "ollama"
 	config["actModeOllamaModelId"] = models[0]
 	config["actModeOllamaBaseUrl"] = baseURL
 	config["planModeApiProvider"] = "ollama"
 	config["planModeOllamaModelId"] = models[0]
 	config["planModeOllamaBaseUrl"] = baseURL
 	config["welcomeViewCompleted"] = true
 	data, err := json.MarshalIndent(config, "", "  ")
 	if err != nil {
 		return err
 	}
 	return fileutil.WriteWithBackup(configPath, data)
 }
 func (c *Cline) Models() []string {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return nil
 	}
 	config, err := fileutil.ReadJSON(filepath.Join(home, ".cline", "data", "globalState.json"))
 	if err != nil {
 		return nil
 	}
 	if config["actModeApiProvider"] != "ollama" {
 		return nil
 	}
 	modelID, _ := config["actModeOllamaModelId"].(string)
 	if modelID == "" {
 		return nil
 	}
 	return []string{modelID}
 }
--- a/cmd/launch/cline_test.go
+++ b/cmd/launch/cline_test.go
@@ -0,0 +1,204 @@
 package launch
 import (
 	"encoding/json"
 	"os"
 	"path/filepath"
 	"testing"
 )
 func TestClineIntegration(t *testing.T) {
 	c := &Cline{}
 	t.Run("String", func(t *testing.T) {
 		if got := c.String(); got != "Cline" {
 			t.Errorf("String() = %q, want %q", got, "Cline")
 		}
 	})
 	t.Run("implements Runner", func(t *testing.T) {
 		var _ Runner = c
 	})
 	t.Run("implements Editor", func(t *testing.T) {
 		var _ Editor = c
 	})
 }
 func TestClineEdit(t *testing.T) {
 	c := &Cline{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	configDir := filepath.Join(tmpDir, ".cline", "data")
 	configPath := filepath.Join(configDir, "globalState.json")
 	readConfig := func() map[string]any {
 		data, _ := os.ReadFile(configPath)
 		var config map[string]any
 		json.Unmarshal(data, &config)
 		return config
 	}
 	t.Run("creates config from scratch", func(t *testing.T) {
 		os.RemoveAll(filepath.Join(tmpDir, ".cline"))
 		if err := c.Edit([]string{"kimi-k2.5:cloud"}); err != nil {
 			t.Fatal(err)
 		}
 		config := readConfig()
 		if config["actModeApiProvider"] != "ollama" {
 			t.Errorf("actModeApiProvider = %v, want ollama", config["actModeApiProvider"])
 		}
 		if config["actModeOllamaModelId"] != "kimi-k2.5:cloud" {
 			t.Errorf("actModeOllamaModelId = %v, want kimi-k2.5:cloud", config["actModeOllamaModelId"])
 		}
 		if config["planModeApiProvider"] != "ollama" {
 			t.Errorf("planModeApiProvider = %v, want ollama", config["planModeApiProvider"])
 		}
 		if config["planModeOllamaModelId"] != "kimi-k2.5:cloud" {
 			t.Errorf("planModeOllamaModelId = %v, want kimi-k2.5:cloud", config["planModeOllamaModelId"])
 		}
 		if config["welcomeViewCompleted"] != true {
 			t.Errorf("welcomeViewCompleted = %v, want true", config["welcomeViewCompleted"])
 		}
 	})
 	t.Run("preserves existing fields", func(t *testing.T) {
 		os.RemoveAll(filepath.Join(tmpDir, ".cline"))
 		os.MkdirAll(configDir, 0o755)
 		existing := map[string]any{
 			"remoteRulesToggles":    map[string]any{},
 			"remoteWorkflowToggles": map[string]any{},
 			"customSetting":         "keep-me",
 		}
 		data, _ := json.Marshal(existing)
 		os.WriteFile(configPath, data, 0o644)
 		if err := c.Edit([]string{"glm-5:cloud"}); err != nil {
 			t.Fatal(err)
 		}
 		config := readConfig()
 		if config["customSetting"] != "keep-me" {
 			t.Errorf("customSetting was not preserved")
 		}
 		if config["actModeOllamaModelId"] != "glm-5:cloud" {
 			t.Errorf("actModeOllamaModelId = %v, want glm-5:cloud", config["actModeOllamaModelId"])
 		}
 	})
 	t.Run("updates model on re-edit", func(t *testing.T) {
 		os.RemoveAll(filepath.Join(tmpDir, ".cline"))
 		if err := c.Edit([]string{"kimi-k2.5:cloud"}); err != nil {
 			t.Fatal(err)
 		}
 		if err := c.Edit([]string{"glm-5:cloud"}); err != nil {
 			t.Fatal(err)
 		}
 		config := readConfig()
 		if config["actModeOllamaModelId"] != "glm-5:cloud" {
 			t.Errorf("actModeOllamaModelId = %v, want glm-5:cloud", config["actModeOllamaModelId"])
 		}
 		if config["planModeOllamaModelId"] != "glm-5:cloud" {
 			t.Errorf("planModeOllamaModelId = %v, want glm-5:cloud", config["planModeOllamaModelId"])
 		}
 	})
 	t.Run("empty models is no-op", func(t *testing.T) {
 		os.RemoveAll(filepath.Join(tmpDir, ".cline"))
 		if err := c.Edit(nil); err != nil {
 			t.Fatal(err)
 		}
 		if _, err := os.Stat(configPath); !os.IsNotExist(err) {
 			t.Error("expected no config file to be created for empty models")
 		}
 	})
 	t.Run("uses first model as primary", func(t *testing.T) {
 		os.RemoveAll(filepath.Join(tmpDir, ".cline"))
 		if err := c.Edit([]string{"kimi-k2.5:cloud", "glm-5:cloud"}); err != nil {
 			t.Fatal(err)
 		}
 		config := readConfig()
 		if config["actModeOllamaModelId"] != "kimi-k2.5:cloud" {
 			t.Errorf("actModeOllamaModelId = %v, want kimi-k2.5:cloud (first model)", config["actModeOllamaModelId"])
 		}
 	})
 }
 func TestClineModels(t *testing.T) {
 	c := &Cline{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	configDir := filepath.Join(tmpDir, ".cline", "data")
 	configPath := filepath.Join(configDir, "globalState.json")
 	t.Run("returns nil when no config", func(t *testing.T) {
 		if models := c.Models(); models != nil {
 			t.Errorf("Models() = %v, want nil", models)
 		}
 	})
 	t.Run("returns nil when provider is not ollama", func(t *testing.T) {
 		os.MkdirAll(configDir, 0o755)
 		config := map[string]any{
 			"actModeApiProvider":   "anthropic",
 			"actModeOllamaModelId": "some-model",
 		}
 		data, _ := json.Marshal(config)
 		os.WriteFile(configPath, data, 0o644)
 		if models := c.Models(); models != nil {
 			t.Errorf("Models() = %v, want nil", models)
 		}
 	})
 	t.Run("returns model when ollama is configured", func(t *testing.T) {
 		os.MkdirAll(configDir, 0o755)
 		config := map[string]any{
 			"actModeApiProvider":   "ollama",
 			"actModeOllamaModelId": "kimi-k2.5:cloud",
 		}
 		data, _ := json.Marshal(config)
 		os.WriteFile(configPath, data, 0o644)
 		models := c.Models()
 		if len(models) != 1 || models[0] != "kimi-k2.5:cloud" {
 			t.Errorf("Models() = %v, want [kimi-k2.5:cloud]", models)
 		}
 	})
 }
 func TestClinePaths(t *testing.T) {
 	c := &Cline{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	t.Run("returns nil when no config exists", func(t *testing.T) {
 		if paths := c.Paths(); paths != nil {
 			t.Errorf("Paths() = %v, want nil", paths)
 		}
 	})
 	t.Run("returns path when config exists", func(t *testing.T) {
 		configDir := filepath.Join(tmpDir, ".cline", "data")
 		os.MkdirAll(configDir, 0o755)
 		configPath := filepath.Join(configDir, "globalState.json")
 		os.WriteFile(configPath, []byte("{}"), 0o644)
 		paths := c.Paths()
 		if len(paths) != 1 || paths[0] != configPath {
 			t.Errorf("Paths() = %v, want [%s]", paths, configPath)
 		}
 	})
 }
--- a/cmd/launch/codex.go
+++ b/cmd/launch/codex.go
@@ -0,0 +1,148 @@
 package launch
 import (
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
 	"github.com/ollama/ollama/envconfig"
 	"golang.org/x/mod/semver"
 )
 // Codex implements Runner for Codex integration
 type Codex struct{}
 func (c *Codex) String() string { return "Codex" }
 const codexProfileName = "ollama-launch"
 func (c *Codex) args(model string, extra []string) []string {
 	args := []string{"--profile", codexProfileName}
 	if model != "" {
 		args = append(args, "-m", model)
 	}
 	args = append(args, extra...)
 	return args
 }
 func (c *Codex) Run(model string, args []string) error {
 	if err := checkCodexVersion(); err != nil {
 		return err
 	}
 	if err := ensureCodexConfig(); err != nil {
 		return fmt.Errorf("failed to configure codex: %w", err)
 	}
 	cmd := exec.Command("codex", c.args(model, args)...)
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	cmd.Env = append(os.Environ(),
 		"OPENAI_API_KEY=ollama",
 	)
 	return cmd.Run()
 }
 // ensureCodexConfig writes a [profiles.ollama-launch] section to ~/.codex/config.toml
 // with openai_base_url pointing to the local Ollama server.
 func ensureCodexConfig() error {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return err
 	}
 	codexDir := filepath.Join(home, ".codex")
 	if err := os.MkdirAll(codexDir, 0o755); err != nil {
 		return err
 	}
 	configPath := filepath.Join(codexDir, "config.toml")
 	return writeCodexProfile(configPath)
 }
 // writeCodexProfile ensures ~/.codex/config.toml has the ollama-launch profile
 // and model provider sections with the correct base URL.
 func writeCodexProfile(configPath string) error {
 	baseURL := envconfig.Host().String() + "/v1/"
 	sections := []struct {
 		header string
 		lines  []string
 	}{
 		{
 			header: fmt.Sprintf("[profiles.%s]", codexProfileName),
 			lines: []string{
 				fmt.Sprintf("openai_base_url = %q", baseURL),
 				`forced_login_method = "api"`,
 				fmt.Sprintf("model_provider = %q", codexProfileName),
 			},
 		},
 		{
 			header: fmt.Sprintf("[model_providers.%s]", codexProfileName),
 			lines: []string{
 				`name = "Ollama"`,
 				fmt.Sprintf("base_url = %q", baseURL),
 			},
 		},
 	}
 	content, readErr := os.ReadFile(configPath)
 	text := ""
 	if readErr == nil {
 		text = string(content)
 	}
 	for _, s := range sections {
 		block := strings.Join(append([]string{s.header}, s.lines...), "\n") + "\n"
 		if idx := strings.Index(text, s.header); idx >= 0 {
 			// Replace the existing section up to the next section header.
 			rest := text[idx+len(s.header):]
 			if endIdx := strings.Index(rest, "\n["); endIdx >= 0 {
 				text = text[:idx] + block + rest[endIdx+1:]
 			} else {
 				text = text[:idx] + block
 			}
 		} else {
 			// Append the section.
 			if text != "" && !strings.HasSuffix(text, "\n") {
 				text += "\n"
 			}
 			if text != "" {
 				text += "\n"
 			}
 			text += block
 		}
 	}
 	return os.WriteFile(configPath, []byte(text), 0o644)
 }
 func checkCodexVersion() error {
 	if _, err := exec.LookPath("codex"); err != nil {
 		return fmt.Errorf("codex is not installed, install with: npm install -g @openai/codex")
 	}
 	out, err := exec.Command("codex", "--version").Output()
 	if err != nil {
 		return fmt.Errorf("failed to get codex version: %w", err)
 	}
 	// Parse output like "codex-cli 0.87.0"
 	fields := strings.Fields(strings.TrimSpace(string(out)))
 	if len(fields) < 2 {
 		return fmt.Errorf("unexpected codex version output: %s", string(out))
 	}
 	version := "v" + fields[len(fields)-1]
 	minVersion := "v0.81.0"
 	if semver.Compare(version, minVersion) < 0 {
 		return fmt.Errorf("codex version %s is too old, minimum required is %s, update with: npm update -g @openai/codex", fields[len(fields)-1], "0.81.0")
 	}
 	return nil
 }
--- a/cmd/launch/codex_test.go
+++ b/cmd/launch/codex_test.go
@@ -0,0 +1,229 @@
 package launch
 import (
 	"os"
 	"path/filepath"
 	"slices"
 	"strings"
 	"testing"
 )
 func TestCodexArgs(t *testing.T) {
 	c := &Codex{}
 	tests := []struct {
 		name  string
 		model string
 		args  []string
 		want  []string
 	}{
 		{"with model", "llama3.2", nil, []string{"--profile", "ollama-launch", "-m", "llama3.2"}},
 		{"empty model", "", nil, []string{"--profile", "ollama-launch"}},
 		{"with model and extra args", "qwen3.5", []string{"-p", "myprofile"}, []string{"--profile", "ollama-launch", "-m", "qwen3.5", "-p", "myprofile"}},
 		{"with sandbox flag", "llama3.2", []string{"--sandbox", "workspace-write"}, []string{"--profile", "ollama-launch", "-m", "llama3.2", "--sandbox", "workspace-write"}},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := c.args(tt.model, tt.args)
 			if !slices.Equal(got, tt.want) {
 				t.Errorf("args(%q, %v) = %v, want %v", tt.model, tt.args, got, tt.want)
 			}
 		})
 	}
 }
 func TestWriteCodexProfile(t *testing.T) {
 	t.Run("creates new file when none exists", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		configPath := filepath.Join(tmpDir, "config.toml")
 		if err := writeCodexProfile(configPath); err != nil {
 			t.Fatal(err)
 		}
 		data, err := os.ReadFile(configPath)
 		if err != nil {
 			t.Fatal(err)
 		}
 		content := string(data)
 		if !strings.Contains(content, "[profiles.ollama-launch]") {
 			t.Error("missing [profiles.ollama-launch] header")
 		}
 		if !strings.Contains(content, "openai_base_url") {
 			t.Error("missing openai_base_url key")
 		}
 		if !strings.Contains(content, "/v1/") {
 			t.Error("missing /v1/ suffix in base URL")
 		}
 		if !strings.Contains(content, `forced_login_method = "api"`) {
 			t.Error("missing forced_login_method key")
 		}
 		if !strings.Contains(content, `model_provider = "ollama-launch"`) {
 			t.Error("missing model_provider key")
 		}
 		if !strings.Contains(content, "[model_providers.ollama-launch]") {
 			t.Error("missing [model_providers.ollama-launch] section")
 		}
 		if !strings.Contains(content, `name = "Ollama"`) {
 			t.Error("missing model provider name")
 		}
 	})
 	t.Run("appends profile to existing file without profile", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		configPath := filepath.Join(tmpDir, "config.toml")
 		existing := "[some_other_section]\nkey = \"value\"\n"
 		os.WriteFile(configPath, []byte(existing), 0o644)
 		if err := writeCodexProfile(configPath); err != nil {
 			t.Fatal(err)
 		}
 		data, _ := os.ReadFile(configPath)
 		content := string(data)
 		if !strings.Contains(content, "[some_other_section]") {
 			t.Error("existing section was removed")
 		}
 		if !strings.Contains(content, "[profiles.ollama-launch]") {
 			t.Error("missing [profiles.ollama-launch] header")
 		}
 	})
 	t.Run("replaces existing profile section", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		configPath := filepath.Join(tmpDir, "config.toml")
 		existing := "[profiles.ollama-launch]\nopenai_base_url = \"http://old:1234/v1/\"\n\n[model_providers.ollama-launch]\nname = \"Ollama\"\nbase_url = \"http://old:1234/v1/\"\n"
 		os.WriteFile(configPath, []byte(existing), 0o644)
 		if err := writeCodexProfile(configPath); err != nil {
 			t.Fatal(err)
 		}
 		data, _ := os.ReadFile(configPath)
 		content := string(data)
 		if strings.Contains(content, "old:1234") {
 			t.Error("old URL was not replaced")
 		}
 		if strings.Count(content, "[profiles.ollama-launch]") != 1 {
 			t.Errorf("expected exactly one [profiles.ollama-launch] section, got %d", strings.Count(content, "[profiles.ollama-launch]"))
 		}
 		if strings.Count(content, "[model_providers.ollama-launch]") != 1 {
 			t.Errorf("expected exactly one [model_providers.ollama-launch] section, got %d", strings.Count(content, "[model_providers.ollama-launch]"))
 		}
 	})
 	t.Run("replaces profile while preserving following sections", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		configPath := filepath.Join(tmpDir, "config.toml")
 		existing := "[profiles.ollama-launch]\nopenai_base_url = \"http://old:1234/v1/\"\n[another_section]\nfoo = \"bar\"\n"
 		os.WriteFile(configPath, []byte(existing), 0o644)
 		if err := writeCodexProfile(configPath); err != nil {
 			t.Fatal(err)
 		}
 		data, _ := os.ReadFile(configPath)
 		content := string(data)
 		if strings.Contains(content, "old:1234") {
 			t.Error("old URL was not replaced")
 		}
 		if !strings.Contains(content, "[another_section]") {
 			t.Error("following section was removed")
 		}
 		if !strings.Contains(content, "foo = \"bar\"") {
 			t.Error("following section content was removed")
 		}
 	})
 	t.Run("appends newline to file not ending with newline", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		configPath := filepath.Join(tmpDir, "config.toml")
 		existing := "[other]\nkey = \"val\""
 		os.WriteFile(configPath, []byte(existing), 0o644)
 		if err := writeCodexProfile(configPath); err != nil {
 			t.Fatal(err)
 		}
 		data, _ := os.ReadFile(configPath)
 		content := string(data)
 		if !strings.Contains(content, "[profiles.ollama-launch]") {
 			t.Error("missing [profiles.ollama-launch] header")
 		}
 		// Should not have double blank lines from missing trailing newline
 		if strings.Contains(content, "\n\n\n") {
 			t.Error("unexpected triple newline in output")
 		}
 	})
 	t.Run("uses custom OLLAMA_HOST", func(t *testing.T) {
 		t.Setenv("OLLAMA_HOST", "http://myhost:9999")
 		tmpDir := t.TempDir()
 		configPath := filepath.Join(tmpDir, "config.toml")
 		if err := writeCodexProfile(configPath); err != nil {
 			t.Fatal(err)
 		}
 		data, _ := os.ReadFile(configPath)
 		content := string(data)
 		if !strings.Contains(content, "myhost:9999/v1/") {
 			t.Errorf("expected custom host in URL, got:\n%s", content)
 		}
 	})
 }
 func TestEnsureCodexConfig(t *testing.T) {
 	t.Run("creates .codex dir and config.toml", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		if err := ensureCodexConfig(); err != nil {
 			t.Fatal(err)
 		}
 		configPath := filepath.Join(tmpDir, ".codex", "config.toml")
 		data, err := os.ReadFile(configPath)
 		if err != nil {
 			t.Fatalf("config.toml not created: %v", err)
 		}
 		content := string(data)
 		if !strings.Contains(content, "[profiles.ollama-launch]") {
 			t.Error("missing [profiles.ollama-launch] header")
 		}
 		if !strings.Contains(content, "openai_base_url") {
 			t.Error("missing openai_base_url key")
 		}
 	})
 	t.Run("is idempotent", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		if err := ensureCodexConfig(); err != nil {
 			t.Fatal(err)
 		}
 		if err := ensureCodexConfig(); err != nil {
 			t.Fatal(err)
 		}
 		configPath := filepath.Join(tmpDir, ".codex", "config.toml")
 		data, _ := os.ReadFile(configPath)
 		content := string(data)
 		if strings.Count(content, "[profiles.ollama-launch]") != 1 {
 			t.Errorf("expected exactly one [profiles.ollama-launch] section after two calls, got %d", strings.Count(content, "[profiles.ollama-launch]"))
 		}
 		if strings.Count(content, "[model_providers.ollama-launch]") != 1 {
 			t.Errorf("expected exactly one [model_providers.ollama-launch] section after two calls, got %d", strings.Count(content, "[model_providers.ollama-launch]"))
 		}
 	})
 }
--- a/cmd/launch/command_test.go
+++ b/cmd/launch/command_test.go
@@ -0,0 +1,598 @@
 package launch
 import (
 	"bytes"
 	"fmt"
 	"io"
 	"net/http"
 	"net/http/httptest"
 	"os"
 	"strings"
 	"testing"
 	"github.com/google/go-cmp/cmp"
 	"github.com/ollama/ollama/cmd/config"
 	"github.com/spf13/cobra"
 )
 func captureStderr(t *testing.T, fn func()) string {
 	t.Helper()
 	oldStderr := os.Stderr
 	r, w, err := os.Pipe()
 	if err != nil {
 		t.Fatalf("failed to create stderr pipe: %v", err)
 	}
 	os.Stderr = w
 	defer func() {
 		os.Stderr = oldStderr
 	}()
 	done := make(chan string, 1)
 	go func() {
 		var buf bytes.Buffer
 		_, _ = io.Copy(&buf, r)
 		done <- buf.String()
 	}()
 	fn()
 	_ = w.Close()
 	return <-done
 }
 func TestLaunchCmd(t *testing.T) {
 	mockCheck := func(cmd *cobra.Command, args []string) error {
 		return nil
 	}
 	mockTUI := func(cmd *cobra.Command) {}
 	cmd := LaunchCmd(mockCheck, mockTUI)
 	t.Run("command structure", func(t *testing.T) {
 		if cmd.Use != "launch [INTEGRATION] [-- [EXTRA_ARGS...]]" {
 			t.Errorf("Use = %q, want %q", cmd.Use, "launch [INTEGRATION] [-- [EXTRA_ARGS...]]")
 		}
 		if cmd.Short == "" {
 			t.Error("Short description should not be empty")
 		}
 		if cmd.Long == "" {
 			t.Error("Long description should not be empty")
 		}
 	})
 	t.Run("flags exist", func(t *testing.T) {
 		if cmd.Flags().Lookup("model") == nil {
 			t.Error("--model flag should exist")
 		}
 		if cmd.Flags().Lookup("config") == nil {
 			t.Error("--config flag should exist")
 		}
 		if cmd.Flags().Lookup("yes") == nil {
 			t.Error("--yes flag should exist")
 		}
 	})
 	t.Run("PreRunE is set", func(t *testing.T) {
 		if cmd.PreRunE == nil {
 			t.Error("PreRunE should be set to checkServerHeartbeat")
 		}
 	})
 }
 func TestLaunchCmdTUICallback(t *testing.T) {
 	mockCheck := func(cmd *cobra.Command, args []string) error {
 		return nil
 	}
 	t.Run("no args calls TUI", func(t *testing.T) {
 		tuiCalled := false
 		mockTUI := func(cmd *cobra.Command) {
 			tuiCalled = true
 		}
 		cmd := LaunchCmd(mockCheck, mockTUI)
 		cmd.SetArgs([]string{})
 		_ = cmd.Execute()
 		if !tuiCalled {
 			t.Error("TUI callback should be called when no args provided")
 		}
 	})
 	t.Run("integration arg bypasses TUI", func(t *testing.T) {
 		srv := httptest.NewServer(http.NotFoundHandler())
 		defer srv.Close()
 		t.Setenv("OLLAMA_HOST", srv.URL)
 		tuiCalled := false
 		mockTUI := func(cmd *cobra.Command) {
 			tuiCalled = true
 		}
 		cmd := LaunchCmd(mockCheck, mockTUI)
 		cmd.SetArgs([]string{"claude"})
 		_ = cmd.Execute()
 		if tuiCalled {
 			t.Error("TUI callback should NOT be called when integration arg provided")
 		}
 	})
 	t.Run("--model flag without integration returns error", func(t *testing.T) {
 		tuiCalled := false
 		mockTUI := func(cmd *cobra.Command) {
 			tuiCalled = true
 		}
 		cmd := LaunchCmd(mockCheck, mockTUI)
 		cmd.SetArgs([]string{"--model", "test-model"})
 		err := cmd.Execute()
 		if err == nil {
 			t.Fatal("expected --model without an integration to fail")
 		}
 		if !strings.Contains(err.Error(), "require an integration name") {
 			t.Fatalf("expected integration-name guidance, got %v", err)
 		}
 		if tuiCalled {
 			t.Error("TUI callback should NOT be called when --model is provided without an integration")
 		}
 	})
 	t.Run("--config flag without integration returns error", func(t *testing.T) {
 		tuiCalled := false
 		mockTUI := func(cmd *cobra.Command) {
 			tuiCalled = true
 		}
 		cmd := LaunchCmd(mockCheck, mockTUI)
 		cmd.SetArgs([]string{"--config"})
 		err := cmd.Execute()
 		if err == nil {
 			t.Fatal("expected --config without an integration to fail")
 		}
 		if !strings.Contains(err.Error(), "require an integration name") {
 			t.Fatalf("expected integration-name guidance, got %v", err)
 		}
 		if tuiCalled {
 			t.Error("TUI callback should NOT be called when --config is provided without an integration")
 		}
 	})
 	t.Run("--yes flag without integration returns error", func(t *testing.T) {
 		tuiCalled := false
 		mockTUI := func(cmd *cobra.Command) {
 			tuiCalled = true
 		}
 		cmd := LaunchCmd(mockCheck, mockTUI)
 		cmd.SetArgs([]string{"--yes"})
 		err := cmd.Execute()
 		if err == nil {
 			t.Fatal("expected --yes without an integration to fail")
 		}
 		if !strings.Contains(err.Error(), "require an integration name") {
 			t.Fatalf("expected integration-name guidance, got %v", err)
 		}
 		if tuiCalled {
 			t.Error("TUI callback should NOT be called when --yes is provided without an integration")
 		}
 	})
 	t.Run("extra args without integration return error", func(t *testing.T) {
 		tuiCalled := false
 		mockTUI := func(cmd *cobra.Command) {
 			tuiCalled = true
 		}
 		cmd := LaunchCmd(mockCheck, mockTUI)
 		cmd.SetArgs([]string{"--model", "test-model", "--", "--sandbox", "workspace-write"})
 		err := cmd.Execute()
 		if err == nil {
 			t.Fatal("expected flags and extra args without an integration to fail")
 		}
 		if !strings.Contains(err.Error(), "require an integration name") {
 			t.Fatalf("expected integration-name guidance, got %v", err)
 		}
 		if tuiCalled {
 			t.Error("TUI callback should NOT be called when flags or extra args are provided without an integration")
 		}
 	})
 }
 func TestLaunchCmdNilHeartbeat(t *testing.T) {
 	cmd := LaunchCmd(nil, nil)
 	if cmd == nil {
 		t.Fatal("LaunchCmd returned nil")
 	}
 	if cmd.PreRunE != nil {
 		t.Log("Note: PreRunE is set even when nil is passed (acceptable)")
 	}
 }
 func TestLaunchCmdModelFlagFiltersDisabledCloudFromSavedConfig(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	if err := config.SaveIntegration("stubeditor", []string{"glm-5:cloud"}); err != nil {
 		t.Fatalf("failed to seed saved config: %v", err)
 	}
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.URL.Path {
 		case "/api/status":
 			fmt.Fprintf(w, `{"cloud":{"disabled":true,"source":"config"}}`)
 		case "/api/show":
 			fmt.Fprintf(w, `{"model":"llama3.2"}`)
 		default:
 			w.WriteHeader(http.StatusNotFound)
 		}
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	stub := &launcherEditorRunner{}
 	restore := OverrideIntegration("stubeditor", stub)
 	defer restore()
 	cmd := LaunchCmd(func(cmd *cobra.Command, args []string) error { return nil }, func(cmd *cobra.Command) {})
 	cmd.SetArgs([]string{"stubeditor", "--model", "llama3.2"})
 	if err := cmd.Execute(); err != nil {
 		t.Fatalf("launch command failed: %v", err)
 	}
 	saved, err := config.LoadIntegration("stubeditor")
 	if err != nil {
 		t.Fatalf("failed to reload integration config: %v", err)
 	}
 	if diff := cmp.Diff([]string{"llama3.2"}, saved.Models); diff != "" {
 		t.Fatalf("saved models mismatch (-want +got):\n%s", diff)
 	}
 	if diff := cmp.Diff([][]string{{"llama3.2"}}, stub.edited); diff != "" {
 		t.Fatalf("editor models mismatch (-want +got):\n%s", diff)
 	}
 	if stub.ranModel != "llama3.2" {
 		t.Fatalf("expected launch to run with llama3.2, got %q", stub.ranModel)
 	}
 }
 func TestLaunchCmdModelFlagClearsDisabledCloudOverride(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.URL.Path {
 		case "/api/status":
 			fmt.Fprintf(w, `{"cloud":{"disabled":true,"source":"config"}}`)
 		case "/api/tags":
 			fmt.Fprint(w, `{"models":[{"name":"llama3.2"}]}`)
 		case "/api/show":
 			fmt.Fprint(w, `{"model":"llama3.2"}`)
 		default:
 			w.WriteHeader(http.StatusNotFound)
 		}
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	stub := &launcherSingleRunner{}
 	restore := OverrideIntegration("stubapp", stub)
 	defer restore()
 	oldSelector := DefaultSingleSelector
 	defer func() { DefaultSingleSelector = oldSelector }()
 	var selectorCalls int
 	var gotCurrent string
 	DefaultSingleSelector = func(title string, items []ModelItem, current string) (string, error) {
 		selectorCalls++
 		gotCurrent = current
 		return "llama3.2", nil
 	}
 	cmd := LaunchCmd(func(cmd *cobra.Command, args []string) error { return nil }, func(cmd *cobra.Command) {})
 	cmd.SetArgs([]string{"stubapp", "--model", "glm-5:cloud"})
 	stderr := captureStderr(t, func() {
 		if err := cmd.Execute(); err != nil {
 			t.Fatalf("launch command failed: %v", err)
 		}
 	})
 	if selectorCalls != 1 {
 		t.Fatalf("expected disabled cloud override to fall back to selector, got %d calls", selectorCalls)
 	}
 	if gotCurrent != "" {
 		t.Fatalf("expected disabled override to be cleared before selection, got current %q", gotCurrent)
 	}
 	if stub.ranModel != "llama3.2" {
 		t.Fatalf("expected launch to run with replacement local model, got %q", stub.ranModel)
 	}
 	if !strings.Contains(stderr, "Warning: ignoring --model glm-5:cloud because cloud is disabled") {
 		t.Fatalf("expected disabled-cloud warning, got stderr: %q", stderr)
 	}
 	saved, err := config.LoadIntegration("stubapp")
 	if err != nil {
 		t.Fatalf("failed to reload integration config: %v", err)
 	}
 	if diff := cmp.Diff([]string{"llama3.2"}, saved.Models); diff != "" {
 		t.Fatalf("saved models mismatch (-want +got):\n%s", diff)
 	}
 }
 func TestLaunchCmdYes_AutoConfirmsLaunchPromptPath(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	withLauncherHooks(t)
 	withInteractiveSession(t, false)
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.URL.Path {
 		case "/api/show":
 			fmt.Fprint(w, `{"model":"llama3.2"}`)
 		case "/api/status":
 			w.WriteHeader(http.StatusNotFound)
 			fmt.Fprint(w, `{"error":"not found"}`)
 		default:
 			w.WriteHeader(http.StatusNotFound)
 		}
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	stub := &launcherEditorRunner{paths: []string{"/tmp/stubeditor.json"}}
 	restore := OverrideIntegration("stubeditor", stub)
 	defer restore()
 	DefaultConfirmPrompt = func(prompt string, options ConfirmOptions) (bool, error) {
 		t.Fatalf("unexpected prompt with --yes: %q", prompt)
 		return false, nil
 	}
 	cmd := LaunchCmd(func(cmd *cobra.Command, args []string) error { return nil }, func(cmd *cobra.Command) {})
 	cmd.SetArgs([]string{"stubeditor", "--model", "llama3.2", "--yes"})
 	if err := cmd.Execute(); err != nil {
 		t.Fatalf("launch command with --yes failed: %v", err)
 	}
 	if diff := cmp.Diff([][]string{{"llama3.2"}}, stub.edited); diff != "" {
 		t.Fatalf("editor models mismatch (-want +got):\n%s", diff)
 	}
 	if stub.ranModel != "llama3.2" {
 		t.Fatalf("expected launch to run with llama3.2, got %q", stub.ranModel)
 	}
 }
 func TestLaunchCmdHeadlessWithYes_AutoPullsMissingLocalModel(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	withLauncherHooks(t)
 	withInteractiveSession(t, false)
 	var pullCalled bool
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.URL.Path {
 		case "/api/show":
 			w.WriteHeader(http.StatusNotFound)
 			fmt.Fprint(w, `{"error":"model not found"}`)
 		case "/api/pull":
 			pullCalled = true
 			w.WriteHeader(http.StatusOK)
 			fmt.Fprint(w, `{"status":"success"}`)
 		default:
 			w.WriteHeader(http.StatusNotFound)
 		}
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	stub := &launcherSingleRunner{}
 	restore := OverrideIntegration("stubapp", stub)
 	defer restore()
 	DefaultConfirmPrompt = func(prompt string, options ConfirmOptions) (bool, error) {
 		t.Fatalf("unexpected prompt with --yes in headless autopull path: %q", prompt)
 		return false, nil
 	}
 	cmd := LaunchCmd(func(cmd *cobra.Command, args []string) error { return nil }, func(cmd *cobra.Command) {})
 	cmd.SetArgs([]string{"stubapp", "--model", "missing-model", "--yes"})
 	if err := cmd.Execute(); err != nil {
 		t.Fatalf("launch command with --yes failed: %v", err)
 	}
 	if !pullCalled {
 		t.Fatal("expected missing local model to be auto-pulled with --yes in headless mode")
 	}
 	if stub.ranModel != "missing-model" {
 		t.Fatalf("expected launch to run with pulled model, got %q", stub.ranModel)
 	}
 }
 func TestLaunchCmdHeadlessWithoutYes_ReturnsActionableConfirmError(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	withLauncherHooks(t)
 	withInteractiveSession(t, false)
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.URL.Path {
 		case "/api/show":
 			fmt.Fprint(w, `{"model":"llama3.2"}`)
 		case "/api/status":
 			w.WriteHeader(http.StatusNotFound)
 			fmt.Fprint(w, `{"error":"not found"}`)
 		default:
 			w.WriteHeader(http.StatusNotFound)
 		}
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	stub := &launcherEditorRunner{paths: []string{"/tmp/stubeditor.json"}}
 	restore := OverrideIntegration("stubeditor", stub)
 	defer restore()
 	DefaultConfirmPrompt = func(prompt string, options ConfirmOptions) (bool, error) {
 		t.Fatalf("unexpected prompt in headless non-yes mode: %q", prompt)
 		return false, nil
 	}
 	cmd := LaunchCmd(func(cmd *cobra.Command, args []string) error { return nil }, func(cmd *cobra.Command) {})
 	cmd.SetArgs([]string{"stubeditor", "--model", "llama3.2"})
 	err := cmd.Execute()
 	if err == nil {
 		t.Fatal("expected launch command to fail without --yes in headless mode")
 	}
 	if !strings.Contains(err.Error(), "re-run with --yes") {
 		t.Fatalf("expected actionable --yes guidance, got %v", err)
 	}
 	if len(stub.edited) != 0 {
 		t.Fatalf("expected no editor writes when confirmation is blocked, got %v", stub.edited)
 	}
 	if stub.ranModel != "" {
 		t.Fatalf("expected launch to abort before run, got %q", stub.ranModel)
 	}
 }
 func TestLaunchCmdIntegrationArgPromptsForModelWithSavedSelection(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	if err := config.SaveIntegration("stubapp", []string{"llama3.2"}); err != nil {
 		t.Fatalf("failed to seed saved config: %v", err)
 	}
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.URL.Path {
 		case "/api/tags":
 			fmt.Fprint(w, `{"models":[{"name":"llama3.2"},{"name":"qwen3:8b"}]}`)
 		case "/api/show":
 			fmt.Fprint(w, `{"model":"qwen3:8b"}`)
 		default:
 			w.WriteHeader(http.StatusNotFound)
 		}
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	stub := &launcherSingleRunner{}
 	restore := OverrideIntegration("stubapp", stub)
 	defer restore()
 	oldSelector := DefaultSingleSelector
 	defer func() { DefaultSingleSelector = oldSelector }()
 	var gotCurrent string
 	DefaultSingleSelector = func(title string, items []ModelItem, current string) (string, error) {
 		gotCurrent = current
 		return "qwen3:8b", nil
 	}
 	cmd := LaunchCmd(func(cmd *cobra.Command, args []string) error { return nil }, func(cmd *cobra.Command) {})
 	cmd.SetArgs([]string{"stubapp"})
 	if err := cmd.Execute(); err != nil {
 		t.Fatalf("launch command failed: %v", err)
 	}
 	if gotCurrent != "llama3.2" {
 		t.Fatalf("expected selector current model to be saved model llama3.2, got %q", gotCurrent)
 	}
 	if stub.ranModel != "qwen3:8b" {
 		t.Fatalf("expected launch to run selected model qwen3:8b, got %q", stub.ranModel)
 	}
 	saved, err := config.LoadIntegration("stubapp")
 	if err != nil {
 		t.Fatalf("failed to reload integration config: %v", err)
 	}
 	if diff := cmp.Diff([]string{"qwen3:8b"}, saved.Models); diff != "" {
 		t.Fatalf("saved models mismatch (-want +got):\n%s", diff)
 	}
 }
 func TestLaunchCmdHeadlessYes_IntegrationRequiresModelEvenWhenSaved(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	withLauncherHooks(t)
 	withInteractiveSession(t, false)
 	if err := config.SaveIntegration("stubapp", []string{"llama3.2"}); err != nil {
 		t.Fatalf("failed to seed saved config: %v", err)
 	}
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.URL.Path {
 		case "/api/show":
 			fmt.Fprint(w, `{"model":"llama3.2"}`)
 		default:
 			w.WriteHeader(http.StatusNotFound)
 		}
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	stub := &launcherSingleRunner{}
 	restore := OverrideIntegration("stubapp", stub)
 	defer restore()
 	oldSelector := DefaultSingleSelector
 	defer func() { DefaultSingleSelector = oldSelector }()
 	DefaultSingleSelector = func(title string, items []ModelItem, current string) (string, error) {
 		t.Fatal("selector should not be called for headless --yes saved-model launch")
 		return "", nil
 	}
 	cmd := LaunchCmd(func(cmd *cobra.Command, args []string) error { return nil }, func(cmd *cobra.Command) {})
 	cmd.SetArgs([]string{"stubapp", "--yes"})
 	err := cmd.Execute()
 	if err == nil {
 		t.Fatal("expected launch command to fail when --yes is used headlessly without --model")
 	}
 	if !strings.Contains(err.Error(), "requires --model <model>") {
 		t.Fatalf("expected actionable --model guidance, got %v", err)
 	}
 	if stub.ranModel != "" {
 		t.Fatalf("expected launch to abort before run, got %q", stub.ranModel)
 	}
 }
 func TestLaunchCmdHeadlessYes_IntegrationWithoutSavedModelReturnsError(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	withLauncherHooks(t)
 	withInteractiveSession(t, false)
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusNotFound)
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	stub := &launcherSingleRunner{}
 	restore := OverrideIntegration("stubapp", stub)
 	defer restore()
 	oldSelector := DefaultSingleSelector
 	defer func() { DefaultSingleSelector = oldSelector }()
 	DefaultSingleSelector = func(title string, items []ModelItem, current string) (string, error) {
 		t.Fatal("selector should not be called for headless --yes without saved model")
 		return "", nil
 	}
 	cmd := LaunchCmd(func(cmd *cobra.Command, args []string) error { return nil }, func(cmd *cobra.Command) {})
 	cmd.SetArgs([]string{"stubapp", "--yes"})
 	err := cmd.Execute()
 	if err == nil {
 		t.Fatal("expected launch command to fail when --yes is used headlessly without --model")
 	}
 	if !strings.Contains(err.Error(), "requires --model <model>") {
 		t.Fatalf("expected actionable --model guidance, got %v", err)
 	}
 	if stub.ranModel != "" {
 		t.Fatalf("expected launch to abort before run, got %q", stub.ranModel)
 	}
 }
--- a/Show More
+++ b/Show More
`@@ -1 +1 @@`
	`v0.4.1`	`38ad257088fb2193ad47e527cf6534a689f30943`
		`@@ -0,0 +1 @@`
							<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 320 320"><path fill="#fff" d="m297.06 130.97c7.26-21.79 4.76-45.66-6.85-65.48-17.46-30.4-52.56-46.04-86.84-38.68-15.25-17.18-37.16-26.95-60.13-26.81-35.04-.08-66.13 22.48-76.91 55.82-22.51 4.61-41.94 18.7-53.31 38.67-17.59 30.32-13.58 68.54 9.92 94.54-7.26 21.79-4.76 45.66 6.85 65.48 17.46 30.4 52.56 46.04 86.84 38.68 15.24 17.18 37.16 26.95 60.13 26.8 35.06.09 66.16-22.49 76.94-55.86 22.51-4.61 41.94-18.7 53.31-38.67 17.57-30.32 13.55-68.51-9.94-94.51zm-120.28 168.11c-14.03.02-27.62-4.89-38.39-13.88.49-.26 1.34-.73 1.89-1.07l63.72-36.8c3.26-1.85 5.26-5.32 5.24-9.07v-89.83l26.93 15.55c.29.14.48.42.52.74v74.39c-.04 33.08-26.83 59.9-59.91 59.97zm-128.84-55.03c-7.03-12.14-9.56-26.37-7.15-40.18.47.28 1.3.79 1.89 1.13l63.72 36.8c3.23 1.89 7.23 1.89 10.47 0l77.79-44.92v31.1c.02.32-.13.63-.38.83l-64.41 37.19c-28.69 16.52-65.33 6.7-81.92-21.95zm-16.77-139.09c7-12.16 18.05-21.46 31.21-26.29 0 .55-.03 1.52-.03 2.2v73.61c-.02 3.74 1.98 7.21 5.23 9.06l77.79 44.91-26.93 15.55c-.27.18-.61.21-.91.08l-64.42-37.22c-28.63-16.58-38.45-53.21-21.95-81.89zm221.26 51.49-77.79-44.92 26.93-15.54c.27-.18.61-.21.91-.08l64.42 37.19c28.68 16.57 38.51 53.26 21.94 81.94-7.01 12.14-18.05 21.44-31.2 26.28v-75.81c.03-3.74-1.96-7.2-5.2-9.06zm26.8-40.34c-.47-.29-1.3-.79-1.89-1.13l-63.72-36.8c-3.23-1.89-7.23-1.89-10.47 0l-77.79 44.92v-31.1c-.02-.32.13-.63.38-.83l64.41-37.16c28.69-16.55 65.37-6.7 81.91 22 6.99 12.12 9.52 26.31 7.15 40.1zm-168.51 55.43-26.94-15.55c-.29-.14-.48-.42-.52-.74v-74.39c.02-33.12 26.89-59.96 60.01-59.94 14.01 0 27.57 4.92 38.34 13.88-.49.26-1.33.73-1.89 1.07l-63.72 36.8c-3.26 1.85-5.26 5.31-5.24 9.06l-.04 89.79zm14.63-31.54 34.65-20.01 34.65 20v40.01l-34.65 20-34.65-20z"/></svg>