mlx: mixed-precision quant and capability detection improvements (#15409)

Improve the MLX model creation pipeline with several model-agnostic changes:

- Rewrite supportsVision to use vision_config instead of architecture name
- Add supportsAudio for audio encoder detection
- Add alignment checking (isAligned) for quantization group sizes
- Support per-projection mixed quantization in MoE expert packing
- Record per-tensor quant metadata in safetensors blobs
- Parse per-tensor quant metadata at model load time
- Validate quantize output is non-empty before storing
- Fix pin/unpin cleanup in expert group quantization
- Promote v_proj/k_proj/down_proj to INT8 for INT4 base quant
- Add MetalIsAvailable() utility
- Skip audio encoder tensors from quantization
This commit is contained in:
Daniel Hiltgen
2026-04-13 11:43:07 -07:00
committed by GitHub
parent 1b70bb8a10
commit d3da29cbfc
7 changed files with 368 additions and 87 deletions

View File

@@ -191,6 +191,10 @@ func inferSafetensorsCapabilities(modelDir string) []string {
capabilities = append(capabilities, "vision")
}
if supportsAudio(modelDir) {
capabilities = append(capabilities, "audio")
}
if supportsThinking(modelDir) {
capabilities = append(capabilities, "thinking")
}
@@ -496,32 +500,38 @@ func supportsThinking(modelDir string) bool {
return false
}
// supportsVision checks if the model supports image input based on its architecture.
// Qwen3.5 multimodal checkpoints are published as ConditionalGeneration architectures.
// supportsVision checks if the model has a vision encoder by looking for
// vision_config in config.json.
func supportsVision(modelDir string) bool {
configPath := filepath.Join(modelDir, "config.json")
data, err := os.ReadFile(configPath)
data, err := os.ReadFile(filepath.Join(modelDir, "config.json"))
if err != nil {
return false
}
var cfg struct {
Architectures []string `json:"architectures"`
ModelType string `json:"model_type"`
VisionConfig *map[string]any `json:"vision_config"`
}
if err := json.Unmarshal(data, &cfg); err != nil {
return false
}
for _, arch := range cfg.Architectures {
archLower := strings.ToLower(arch)
if strings.Contains(archLower, "qwen3") && strings.Contains(archLower, "conditionalgeneration") {
return true
}
return cfg.VisionConfig != nil
}
func supportsAudio(modelDir string) bool {
data, err := os.ReadFile(filepath.Join(modelDir, "config.json"))
if err != nil {
return false
}
typeLower := strings.ToLower(cfg.ModelType)
return strings.Contains(typeLower, "qwen3") && strings.Contains(typeLower, "conditionalgeneration")
var cfg struct {
AudioConfig *map[string]any `json:"audio_config"`
}
if err := json.Unmarshal(data, &cfg); err != nil {
return false
}
return cfg.AudioConfig != nil
}
// getParserName returns the parser name for a model based on its architecture.