ci: include mlx jit headers on linux (#15083)

* ci: include mlx jit headers on linux

* handle CUDA JIT headers
This commit is contained in:
Daniel Hiltgen
2026-03-26 23:10:07 -07:00
committed by GitHub
parent f567abc63f
commit 516ebd8548
3 changed files with 83 additions and 4 deletions

View File

@@ -246,13 +246,21 @@ if(MLX_ENGINE)
COMPONENT MLX)
endif()
# Install CCCL headers for NVRTC JIT compilation at runtime.
# Install headers for NVRTC JIT compilation at runtime.
# MLX's own install rules use the default component so they get skipped by
# --component MLX. Headers are installed alongside libmlx in OLLAMA_INSTALL_DIR.
#
# Layout:
# ${OLLAMA_INSTALL_DIR}/include/cccl/{cuda,nv}/ — CCCL headers
# ${OLLAMA_INSTALL_DIR}/include/*.h — CUDA toolkit headers
#
# MLX's jit_module.cpp resolves CCCL via
# current_binary_dir()[.parent_path()] / "include" / "cccl"
# On Linux, MLX's jit_module.cpp resolves CCCL via
# current_binary_dir().parent_path() / "include" / "cccl", so we create a
# symlink from lib/ollama/include -> ${OLLAMA_RUNNER_DIR}/include
# This will need refinement if we add multiple CUDA versions for MLX in the future.
# current_binary_dir().parent_path() / "include" / "cccl", so we create a
# symlink from lib/ollama/include -> ${OLLAMA_RUNNER_DIR}/include
# This will need refinement if we add multiple CUDA versions for MLX in the future.
# CUDA runtime headers are found via CUDA_PATH env var (set by mlxrunner).
if(EXISTS ${CMAKE_BINARY_DIR}/_deps/cccl-src/include/cuda)
install(DIRECTORY ${CMAKE_BINARY_DIR}/_deps/cccl-src/include/cuda
DESTINATION ${OLLAMA_INSTALL_DIR}/include/cccl
@@ -271,6 +279,48 @@ if(MLX_ENGINE)
endif()
endif()
# Install minimal CUDA toolkit headers needed by MLX JIT kernels.
# These are the transitive closure of includes from mlx/backend/cuda/device/*.cuh.
# The Go mlxrunner sets CUDA_PATH to OLLAMA_INSTALL_DIR so MLX finds them at
# $CUDA_PATH/include/*.h via NVRTC --include-path.
if(CUDAToolkit_FOUND)
set(_cuda_inc "${CUDAToolkit_INCLUDE_DIRS}")
set(_dst "${OLLAMA_INSTALL_DIR}/include")
set(_MLX_JIT_CUDA_HEADERS
builtin_types.h
cooperative_groups.h
cuda_bf16.h
cuda_bf16.hpp
cuda_device_runtime_api.h
cuda_fp16.h
cuda_fp16.hpp
cuda_fp8.h
cuda_fp8.hpp
cuda_runtime_api.h
device_types.h
driver_types.h
math_constants.h
surface_types.h
texture_types.h
vector_functions.h
vector_functions.hpp
vector_types.h
)
foreach(_hdr ${_MLX_JIT_CUDA_HEADERS})
install(FILES "${_cuda_inc}/${_hdr}"
DESTINATION ${_dst}
COMPONENT MLX)
endforeach()
# Subdirectory headers
install(DIRECTORY "${_cuda_inc}/cooperative_groups"
DESTINATION ${_dst}
COMPONENT MLX
FILES_MATCHING PATTERN "*.h")
install(FILES "${_cuda_inc}/crt/host_defines.h"
DESTINATION "${_dst}/crt"
COMPONENT MLX)
endif()
# On Windows, explicitly install dl.dll (dlfcn-win32 POSIX dlopen emulation)
# RUNTIME_DEPENDENCIES auto-excludes it via POST_EXCLUDE_FILES_STRICT because
# dlfcn-win32 is a known CMake target with its own install rules (which install