mirror of
https://github.com/ollama/ollama.git
synced 2026-04-26 18:55:53 +02:00
mlx: mixed-precision quant and capability detection improvements (#15409)
Improve the MLX model creation pipeline with several model-agnostic changes: - Rewrite supportsVision to use vision_config instead of architecture name - Add supportsAudio for audio encoder detection - Add alignment checking (isAligned) for quantization group sizes - Support per-projection mixed quantization in MoE expert packing - Record per-tensor quant metadata in safetensors blobs - Parse per-tensor quant metadata at model load time - Validate quantize output is non-empty before storing - Fix pin/unpin cleanup in expert group quantization - Promote v_proj/k_proj/down_proj to INT8 for INT4 base quant - Add MetalIsAvailable() utility - Skip audio encoder tensors from quantization
This commit is contained in:
@@ -191,6 +191,10 @@ func inferSafetensorsCapabilities(modelDir string) []string {
|
||||
capabilities = append(capabilities, "vision")
|
||||
}
|
||||
|
||||
if supportsAudio(modelDir) {
|
||||
capabilities = append(capabilities, "audio")
|
||||
}
|
||||
|
||||
if supportsThinking(modelDir) {
|
||||
capabilities = append(capabilities, "thinking")
|
||||
}
|
||||
@@ -496,32 +500,38 @@ func supportsThinking(modelDir string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// supportsVision checks if the model supports image input based on its architecture.
|
||||
// Qwen3.5 multimodal checkpoints are published as ConditionalGeneration architectures.
|
||||
// supportsVision checks if the model has a vision encoder by looking for
|
||||
// vision_config in config.json.
|
||||
func supportsVision(modelDir string) bool {
|
||||
configPath := filepath.Join(modelDir, "config.json")
|
||||
data, err := os.ReadFile(configPath)
|
||||
data, err := os.ReadFile(filepath.Join(modelDir, "config.json"))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var cfg struct {
|
||||
Architectures []string `json:"architectures"`
|
||||
ModelType string `json:"model_type"`
|
||||
VisionConfig *map[string]any `json:"vision_config"`
|
||||
}
|
||||
if err := json.Unmarshal(data, &cfg); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, arch := range cfg.Architectures {
|
||||
archLower := strings.ToLower(arch)
|
||||
if strings.Contains(archLower, "qwen3") && strings.Contains(archLower, "conditionalgeneration") {
|
||||
return true
|
||||
}
|
||||
return cfg.VisionConfig != nil
|
||||
}
|
||||
|
||||
func supportsAudio(modelDir string) bool {
|
||||
data, err := os.ReadFile(filepath.Join(modelDir, "config.json"))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
typeLower := strings.ToLower(cfg.ModelType)
|
||||
return strings.Contains(typeLower, "qwen3") && strings.Contains(typeLower, "conditionalgeneration")
|
||||
var cfg struct {
|
||||
AudioConfig *map[string]any `json:"audio_config"`
|
||||
}
|
||||
if err := json.Unmarshal(data, &cfg); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return cfg.AudioConfig != nil
|
||||
}
|
||||
|
||||
// getParserName returns the parser name for a model based on its architecture.
|
||||
|
||||
Reference in New Issue
Block a user