Files
ollama/scripts/build_darwin.sh
Daniel Hiltgen 56c735d871 runner: Remove CGO engines, use llama-server exclusively for GGML models
Remove the vendored GGML and llama.cpp backend, CGO runner, Go model
implementations, and sample.  llama-server (built from upstream llama.cpp via
FetchContent) is now the sole inference engine for GGUF-based models.
(Safetensor based models continue to run on the new MLX engine.)  This allows
us to more rapidly pick up new capabilities and fixes from llama.cpp as they
come out.

On windows this now requires recent AMD driver versions to support ROCm v7 as
llama.cpp currently does not support building against v6.
2026-04-20 08:44:02 -07:00

379 lines
18 KiB
Bash
Executable File

#!/bin/sh
# Note:
# While testing, if you double-click on the Ollama.app
# some state is left on MacOS and subsequent attempts
# to build again will fail with:
#
# hdiutil: create failed - Operation not permitted
#
# To work around, specify another volume name with:
#
# VOL_NAME="$(date)" ./scripts/build_darwin.sh
#
VOL_NAME=${VOL_NAME:-"Ollama"}
export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${VERSION#v}\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
export CGO_CFLAGS="-O3 -mmacosx-version-min=14.0"
export CGO_CXXFLAGS="-O3 -mmacosx-version-min=14.0"
export CGO_LDFLAGS="-mmacosx-version-min=14.0"
set -e
status() { echo >&2 ">>> $@"; }
usage() {
echo "usage: $(basename $0) [build app [sign]]"
exit 1
}
mkdir -p dist
ARCHS="arm64 amd64"
while getopts "a:h" OPTION; do
case $OPTION in
a) ARCHS=$OPTARG ;;
h) usage ;;
esac
done
shift $(( $OPTIND - 1 ))
_build_darwin() {
# Shared upstream-source cache so each build dir doesn't fetch its own
# copy of llama.cpp. First build to need it populates the cache; the rest
# reference it via FETCHCONTENT_SOURCE_DIR_LLAMA_CPP. Mirrors what we do
# for MLX (FETCHCONTENT_SOURCE_DIR_MLX/MLX-C/JSON/FMT/METAL_CPP further
# down). Stays under build/ so `git clean -dfx` clears it.
LLAMA_CPP_SHARED_SRC="$(pwd)/build/_shared_deps/llama_cpp-src"
if [ ! -f "$LLAMA_CPP_SHARED_SRC/CMakeLists.txt" ]; then
LLAMA_CPP_TAG=$(cat LLAMA_CPP_VERSION)
status "Cloning shared llama.cpp source ($LLAMA_CPP_TAG) → $LLAMA_CPP_SHARED_SRC"
rm -rf "$LLAMA_CPP_SHARED_SRC"
mkdir -p "$(dirname "$LLAMA_CPP_SHARED_SRC")"
git clone --depth 1 --branch "$LLAMA_CPP_TAG" \
https://github.com/ggml-org/llama.cpp.git "$LLAMA_CPP_SHARED_SRC"
fi
for ARCH in $ARCHS; do
status "Building darwin $ARCH"
INSTALL_PREFIX=dist/darwin-$ARCH/
if [ "$ARCH" = "amd64" ]; then
status "Building darwin $ARCH dynamic backends"
BUILD_DIR=build/darwin-$ARCH
cmake -B $BUILD_DIR \
-DCMAKE_OSX_ARCHITECTURES=x86_64 \
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
-DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX \
-DMLX_ENGINE=ON \
-DMLX_ENABLE_X64_MAC=ON \
-DOLLAMA_RUNNER_DIR=./
cmake --build $BUILD_DIR --target mlx mlxc -j
cmake --install $BUILD_DIR --component MLX
# Build llama-server statically (no Metal on x86, CPU-only)
status "Building darwin $ARCH llama-server (static)"
cmake -S llama/server --preset cpu \
-DCMAKE_OSX_ARCHITECTURES=x86_64 \
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
-DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX \
-DFETCHCONTENT_SOURCE_DIR_LLAMA_CPP=$LLAMA_CPP_SHARED_SRC \
-DGGML_METAL=OFF \
-DGGML_CPU_ALL_VARIANTS=ON
cmake --build build/llama-server-cpu --target llama-server --target llama-quantize --target ggml -j
cmake --install build/llama-server-cpu --component llama-server
MLX_CGO_CFLAGS="-O3 -mmacosx-version-min=14.0"
MLX_CGO_LDFLAGS="-ldl -lc++ -framework Accelerate -mmacosx-version-min=14.0"
else
# Build MLX twice for arm64
# Metal 3.x build (backward compatible, macOS 14+)
BUILD_DIR=build/metal-v3
status "Building MLX Metal v3 (macOS 14+)"
cmake -S . -B $BUILD_DIR \
-DCMAKE_BUILD_TYPE=Release \
-DMLX_ENGINE=ON \
-DOLLAMA_RUNNER_DIR=mlx_metal_v3 \
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
-DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX
cmake --build $BUILD_DIR --target mlx mlxc --parallel
cmake --install $BUILD_DIR --component MLX
# Metal 4.x build (NAX-enabled, macOS 26+)
# Only possible with Xcode 26+ SDK; skip on older toolchains.
SDK_MAJOR=$(xcrun --show-sdk-version 2>/dev/null | cut -d. -f1)
if [ "${SDK_MAJOR:-0}" -ge 26 ]; then
V3_DEPS=$BUILD_DIR/_deps
BUILD_DIR_V4=build/metal-v4
status "Building MLX Metal v4 (macOS 26+, NAX)"
cmake -S . -B $BUILD_DIR_V4 \
-DCMAKE_BUILD_TYPE=Release \
-DMLX_ENGINE=ON \
-DOLLAMA_RUNNER_DIR=mlx_metal_v4 \
-DCMAKE_OSX_DEPLOYMENT_TARGET=26.0 \
-DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX \
-DFETCHCONTENT_SOURCE_DIR_MLX=$V3_DEPS/mlx-src \
-DFETCHCONTENT_SOURCE_DIR_MLX-C=$V3_DEPS/mlx-c-src \
-DFETCHCONTENT_SOURCE_DIR_JSON=$V3_DEPS/json-src \
-DFETCHCONTENT_SOURCE_DIR_FMT=$V3_DEPS/fmt-src \
-DFETCHCONTENT_SOURCE_DIR_METAL_CPP=$V3_DEPS/metal_cpp-src
cmake --build $BUILD_DIR_V4 --target mlx mlxc --parallel
cmake --install $BUILD_DIR_V4 --component MLX
else
status "Skipping MLX Metal v4 (SDK $SDK_MAJOR < 26, need Xcode 26+)"
fi
# Build llama-server from upstream source (Metal + CPU linked in)
status "Building darwin $ARCH llama-server (static)"
cmake -S llama/server --preset darwin \
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
-DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX \
-DFETCHCONTENT_SOURCE_DIR_LLAMA_CPP=$LLAMA_CPP_SHARED_SRC
cmake --build build/llama-server-darwin --target llama-server --target llama-quantize --parallel
cmake --install build/llama-server-darwin --component llama-server
# Use the v3 build for CGO linking (compatible with both)
MLX_CGO_CFLAGS="-O3 -mmacosx-version-min=14.0"
MLX_CGO_LDFLAGS="-lc++ -framework Metal -framework Foundation -framework Accelerate -mmacosx-version-min=14.0"
fi
GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 CGO_CFLAGS="$MLX_CGO_CFLAGS" CGO_LDFLAGS="$MLX_CGO_LDFLAGS" go build -o $INSTALL_PREFIX .
# MLX libraries stay in lib/ollama/ (flat or variant subdirs).
# The runtime discovery in dynamic.go searches lib/ollama/ relative
# to the executable, including mlx_* subdirectories.
done
}
_sign_darwin() {
status "Creating universal binary..."
mkdir -p dist/darwin
lipo -create -output dist/darwin/ollama dist/darwin-amd64/ollama dist/darwin-arm64/ollama
chmod +x dist/darwin/ollama
lipo dist/darwin/ollama -verify_arch x86_64 arm64
lipo -create -output dist/darwin/llama-server dist/darwin-amd64/lib/ollama/llama-server dist/darwin-arm64/lib/ollama/llama-server
chmod +x dist/darwin/llama-server
lipo dist/darwin/llama-server -verify_arch x86_64 arm64
lipo -create -output dist/darwin/llama-quantize dist/darwin-amd64/lib/ollama/llama-quantize dist/darwin-arm64/lib/ollama/llama-quantize
chmod +x dist/darwin/llama-quantize
lipo dist/darwin/llama-quantize -verify_arch x86_64 arm64
if [ -n "$APPLE_IDENTITY" ]; then
for F in dist/darwin/ollama dist/darwin/llama-server dist/darwin/llama-quantize dist/darwin-*/lib/ollama/* dist/darwin-*/lib/ollama/mlx_metal_v*/*; do
[ -f "$F" ] && [ ! -L "$F" ] || continue
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime "$F"
done
# create a temporary zip for notarization
TEMP=$(mktemp -u).zip
ditto -c -k --keepParent dist/darwin/ollama "$TEMP"
xcrun notarytool submit "$TEMP" --wait --timeout 20m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
rm -f "$TEMP"
fi
status "Creating universal tarball..."
tar -cf dist/ollama-darwin.tar --strip-components 2 dist/darwin/ollama dist/darwin/llama-server dist/darwin/llama-quantize
tar -rf dist/ollama-darwin.tar --strip-components 4 dist/darwin-amd64/lib/
tar -rf dist/ollama-darwin.tar --strip-components 4 dist/darwin-arm64/lib/
gzip -9vc <dist/ollama-darwin.tar >dist/ollama-darwin.tgz
}
_build_macapp() {
if ! command -v npm &> /dev/null; then
echo "npm is not installed. Please install Node.js and npm first:"
echo " Visit: https://nodejs.org/"
exit 1
fi
if ! command -v tsc &> /dev/null; then
echo "Installing TypeScript compiler..."
npm install -g typescript
fi
echo "Installing required Go tools..."
cd app/ui/app
npm install
npm run build
cd ../../..
# Build the Ollama.app bundle
rm -rf dist/Ollama.app
cp -a ./app/darwin/Ollama.app dist/Ollama.app
# update the modified date of the app bundle to now
touch dist/Ollama.app
go clean -cache
GOARCH=amd64 CGO_ENABLED=1 GOOS=darwin go build -o dist/darwin-app-amd64 -ldflags="-s -w -X=github.com/ollama/ollama/app/version.Version=${VERSION}" ./app/cmd/app
GOARCH=arm64 CGO_ENABLED=1 GOOS=darwin go build -o dist/darwin-app-arm64 -ldflags="-s -w -X=github.com/ollama/ollama/app/version.Version=${VERSION}" ./app/cmd/app
mkdir -p dist/Ollama.app/Contents/MacOS
lipo -create -output dist/Ollama.app/Contents/MacOS/Ollama dist/darwin-app-amd64 dist/darwin-app-arm64
rm -f dist/darwin-app-amd64 dist/darwin-app-arm64
# Create a mock Squirrel.framework bundle
mkdir -p dist/Ollama.app/Contents/Frameworks/Squirrel.framework/Versions/A/Resources/
cp -a dist/Ollama.app/Contents/MacOS/Ollama dist/Ollama.app/Contents/Frameworks/Squirrel.framework/Versions/A/Squirrel
ln -s ../Squirrel dist/Ollama.app/Contents/Frameworks/Squirrel.framework/Versions/A/Resources/ShipIt
cp -a ./app/cmd/squirrel/Info.plist dist/Ollama.app/Contents/Frameworks/Squirrel.framework/Versions/A/Resources/Info.plist
ln -s A dist/Ollama.app/Contents/Frameworks/Squirrel.framework/Versions/Current
ln -s Versions/Current/Resources dist/Ollama.app/Contents/Frameworks/Squirrel.framework/Resources
ln -s Versions/Current/Squirrel dist/Ollama.app/Contents/Frameworks/Squirrel.framework/Squirrel
# Update the version in the Info.plist
plutil -replace CFBundleShortVersionString -string "$VERSION" dist/Ollama.app/Contents/Info.plist
plutil -replace CFBundleVersion -string "$VERSION" dist/Ollama.app/Contents/Info.plist
# Setup the ollama binaries
mkdir -p dist/Ollama.app/Contents/Resources
if [ -d dist/darwin-amd64 ]; then
lipo -create -output dist/Ollama.app/Contents/Resources/ollama dist/darwin-amd64/ollama dist/darwin-arm64/ollama
# llama-server + llama-quantize universal binaries
cp dist/darwin/llama-server dist/Ollama.app/Contents/Resources/
cp dist/darwin/llama-quantize dist/Ollama.app/Contents/Resources/
# Copy .so files from both architectures (names don't collide: arm64=libggml-cpu.so, amd64=libggml-cpu-*.so)
cp dist/darwin-arm64/lib/ollama/*.so dist/Ollama.app/Contents/Resources/ 2>/dev/null || true
cp dist/darwin-amd64/lib/ollama/*.so dist/Ollama.app/Contents/Resources/ 2>/dev/null || true
# Lipo common dylibs into universal binaries, copy amd64-only ones as-is.
# Skip MLX dylibs (libmlx*.dylib) — on arm64 these live in variant
# subdirs (mlx_metal_v3/) and are lipo'd there below. Copying the
# amd64 flat copy here would produce an x86_64-only dylib in
# Resources/ that shadows the variant subdirs.
for F in dist/darwin-amd64/lib/ollama/*.dylib; do
[ -f "$F" ] && [ ! -L "$F" ] || continue
BASE=$(basename "$F")
case "$BASE" in libmlx*) continue ;; esac
if [ -f "dist/darwin-arm64/lib/ollama/$BASE" ]; then
lipo -create -output "dist/Ollama.app/Contents/Resources/$BASE" "$F" "dist/darwin-arm64/lib/ollama/$BASE"
else
cp "$F" dist/Ollama.app/Contents/Resources/
fi
done
# Preserve dylib soname symlinks required by the x86_64 llama-server slice
# (for example @rpath/libmtmd.0.dylib).
for F in dist/darwin-amd64/lib/ollama/*.dylib; do
[ -L "$F" ] || continue
BASE=$(basename "$F")
case "$BASE" in libmlx*) continue ;; esac
cp -P "$F" dist/Ollama.app/Contents/Resources/
done
# MLX Metal variant subdirs from arm64
for VARIANT in dist/darwin-arm64/lib/ollama/mlx_metal_v*/; do
[ -d "$VARIANT" ] || continue
VNAME=$(basename "$VARIANT")
DEST=dist/Ollama.app/Contents/Resources/$VNAME
mkdir -p "$DEST"
if [ "$VNAME" = "mlx_metal_v3" ]; then
# v3: lipo amd64 flat + arm64 v3 into universal dylibs
for LIB in libmlx.dylib libmlxc.dylib; do
if [ -f "dist/darwin-amd64/lib/ollama/$LIB" ] && [ -f "$VARIANT$LIB" ]; then
lipo -create -output "$DEST/$LIB" "dist/darwin-amd64/lib/ollama/$LIB" "$VARIANT$LIB"
elif [ -f "$VARIANT$LIB" ]; then
cp "$VARIANT$LIB" "$DEST/"
fi
done
# Copy remaining files (metallib) from arm64 v3
for F in "$VARIANT"*; do
case "$(basename "$F")" in *.dylib) continue ;; esac
[ -f "$F" ] && [ ! -L "$F" ] || continue
cp "$F" "$DEST/"
done
else
# v4+: arm64-only, copy all non-symlink files
for F in "$VARIANT"*; do
[ -f "$F" ] && [ ! -L "$F" ] || continue
cp "$F" "$DEST/"
done
fi
done
else
cp -a dist/darwin/ollama dist/Ollama.app/Contents/Resources/ollama
cp dist/darwin/llama-server dist/Ollama.app/Contents/Resources/
cp dist/darwin/llama-quantize dist/Ollama.app/Contents/Resources/
# arm64-only build: copy variant subdirs directly
for VARIANT in dist/darwin-arm64/lib/ollama/mlx_metal_v*/; do
[ -d "$VARIANT" ] || continue
VNAME=$(basename "$VARIANT")
mkdir -p dist/Ollama.app/Contents/Resources/$VNAME
cp "$VARIANT"* dist/Ollama.app/Contents/Resources/$VNAME/ 2>/dev/null || true
done
# CPU backend libs (ggml-base, ggml-cpu) are flat in lib/ollama/
cp dist/darwin-arm64/lib/ollama/*.so dist/Ollama.app/Contents/Resources/ 2>/dev/null || true
for F in dist/darwin-arm64/lib/ollama/*.dylib; do
[ -f "$F" ] && [ ! -L "$F" ] || continue
cp "$F" dist/Ollama.app/Contents/Resources/
done
for F in dist/darwin-arm64/lib/ollama/*.dylib; do
[ -L "$F" ] || continue
cp -P "$F" dist/Ollama.app/Contents/Resources/
done
fi
chmod a+x dist/Ollama.app/Contents/Resources/ollama
# Sign
if [ -n "$APPLE_IDENTITY" ]; then
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/Ollama.app/Contents/Resources/ollama
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/Ollama.app/Contents/Resources/llama-server
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/Ollama.app/Contents/Resources/llama-quantize
for lib in dist/Ollama.app/Contents/Resources/*.so dist/Ollama.app/Contents/Resources/*.dylib dist/Ollama.app/Contents/Resources/*.metallib dist/Ollama.app/Contents/Resources/mlx_metal_v*/*.dylib dist/Ollama.app/Contents/Resources/mlx_metal_v*/*.metallib dist/Ollama.app/Contents/Resources/mlx_metal_v*/*.so; do
[ -f "$lib" ] || continue
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime "$lib"
done
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier com.electron.ollama --deep --options=runtime dist/Ollama.app
fi
rm -f dist/Ollama-darwin.zip
ditto -c -k --norsrc --keepParent dist/Ollama.app dist/Ollama-darwin.zip
(cd dist/Ollama.app/Contents/Resources/; tar -cf - ollama llama-server llama-quantize *.so *.dylib *.metallib mlx_metal_v*/ 2>/dev/null) | gzip -9vc > dist/ollama-darwin.tgz
# Notarize and Staple
if [ -n "$APPLE_IDENTITY" ]; then
$(xcrun -f notarytool) submit dist/Ollama-darwin.zip --wait --timeout 20m --apple-id "$APPLE_ID" --password "$APPLE_PASSWORD" --team-id "$APPLE_TEAM_ID"
rm -f dist/Ollama-darwin.zip
$(xcrun -f stapler) staple dist/Ollama.app
ditto -c -k --norsrc --keepParent dist/Ollama.app dist/Ollama-darwin.zip
rm -f dist/Ollama.dmg
(cd dist && ../scripts/create-dmg.sh \
--volname "${VOL_NAME}" \
--volicon ../app/darwin/Ollama.app/Contents/Resources/icon.icns \
--background ../app/assets/background.png \
--window-pos 200 120 \
--window-size 800 400 \
--icon-size 128 \
--icon "Ollama.app" 200 190 \
--hide-extension "Ollama.app" \
--app-drop-link 600 190 \
--text-size 12 \
"Ollama.dmg" \
"Ollama.app" \
; )
rm -f dist/rw*.dmg
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/Ollama.dmg
$(xcrun -f notarytool) submit dist/Ollama.dmg --wait --timeout 20m --apple-id "$APPLE_ID" --password "$APPLE_PASSWORD" --team-id "$APPLE_TEAM_ID"
$(xcrun -f stapler) staple dist/Ollama.dmg
else
echo "WARNING: Code signing disabled, this bundle will not work for upgrade testing"
fi
}
if [ "$#" -eq 0 ]; then
_build_darwin
_sign_darwin
_build_macapp
exit 0
fi
for CMD in "$@"; do
case $CMD in
build) _build_darwin ;;
sign) _sign_darwin ;;
app) _build_macapp ;;
*) usage ;;
esac
done