// cli.go provides CLI commands for image generation models. // // TODO (jmorganca): Integrate these commands into cmd/cmd.go when stable. // Currently these are separate to keep experimental code isolated. package imagegen import ( "encoding/base64" "encoding/json" "errors" "fmt" "io" "os" "strconv" "strings" "time" "github.com/spf13/cobra" "github.com/ollama/ollama/api" "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/progress" "github.com/ollama/ollama/readline" ) // ImageGenOptions holds options for image generation. // These can be set via environment variables or interactive commands. type ImageGenOptions struct { Width int Height int Steps int Seed int NegativePrompt string } // DefaultOptions returns the default image generation options. func DefaultOptions() ImageGenOptions { return ImageGenOptions{ Width: 1024, Height: 1024, Steps: 9, Seed: 0, // 0 means random } } // Show displays information about an image generation model. func Show(modelName string, w io.Writer) error { manifest, err := LoadManifest(modelName) if err != nil { return fmt.Errorf("failed to load manifest: %w", err) } // Count total size var totalSize int64 for _, layer := range manifest.Manifest.Layers { if layer.MediaType == "application/vnd.ollama.image.tensor" { totalSize += layer.Size } } // Read model_index.json for architecture var architecture string if data, err := manifest.ReadConfig("model_index.json"); err == nil { var index struct { Architecture string `json:"architecture"` } if json.Unmarshal(data, &index) == nil { architecture = index.Architecture } } // Estimate parameter count from total size (assuming BF16 = 2 bytes per param) paramCount := totalSize / 2 paramStr := formatParamCount(paramCount) // Print Model info fmt.Fprintln(w, " Model") if architecture != "" { fmt.Fprintf(w, " %-20s %s\n", "architecture", architecture) } fmt.Fprintf(w, " %-20s %s\n", "parameters", paramStr) fmt.Fprintf(w, " %-20s %s\n", "quantization", "BF16") fmt.Fprintln(w) // Print Capabilities fmt.Fprintln(w, " Capabilities") fmt.Fprintf(w, " %s\n", "image") fmt.Fprintln(w) return nil } // formatParamCount formats parameter count as human-readable string. func formatParamCount(count int64) string { if count >= 1_000_000_000 { return fmt.Sprintf("%.1fB", float64(count)/1_000_000_000) } if count >= 1_000_000 { return fmt.Sprintf("%.1fM", float64(count)/1_000_000) } return fmt.Sprintf("%d", count) } // RegisterFlags adds image generation flags to the given command. // Flags are hidden since they only apply to image generation models. func RegisterFlags(cmd *cobra.Command) { cmd.Flags().Int("width", 1024, "Image width") cmd.Flags().Int("height", 1024, "Image height") cmd.Flags().Int("steps", 9, "Denoising steps") cmd.Flags().Int("seed", 0, "Random seed (0 for random)") cmd.Flags().String("negative", "", "Negative prompt") cmd.Flags().MarkHidden("width") cmd.Flags().MarkHidden("height") cmd.Flags().MarkHidden("steps") cmd.Flags().MarkHidden("seed") cmd.Flags().MarkHidden("negative") } // RunCLI handles the CLI for image generation models. // Returns true if it handled the request, false if the caller should continue with normal flow. // Supports flags: --width, --height, --steps, --seed, --negative func RunCLI(cmd *cobra.Command, name string, prompt string, interactive bool, keepAlive *api.Duration) error { // Verify it's a valid image gen model if ResolveModelName(name) == "" { return fmt.Errorf("unknown image generation model: %s", name) } // Get options from flags (with env var defaults) opts := DefaultOptions() if cmd != nil && cmd.Flags() != nil { if v, err := cmd.Flags().GetInt("width"); err == nil && v > 0 { opts.Width = v } if v, err := cmd.Flags().GetInt("height"); err == nil && v > 0 { opts.Height = v } if v, err := cmd.Flags().GetInt("steps"); err == nil && v > 0 { opts.Steps = v } if v, err := cmd.Flags().GetInt("seed"); err == nil && v != 0 { opts.Seed = v } if v, err := cmd.Flags().GetString("negative"); err == nil && v != "" { opts.NegativePrompt = v } } if interactive { return runInteractive(cmd, name, keepAlive, opts) } // One-shot generation return generateImageWithOptions(cmd, name, prompt, keepAlive, opts) } // generateImageWithOptions generates an image with the given options. func generateImageWithOptions(cmd *cobra.Command, modelName, prompt string, keepAlive *api.Duration, opts ImageGenOptions) error { client, err := api.ClientFromEnvironment() if err != nil { return err } // Build request with image gen options encoded in Options fields // NumCtx=width, NumGPU=height, NumPredict=steps, Seed=seed req := &api.GenerateRequest{ Model: modelName, Prompt: prompt, Options: map[string]any{ "num_ctx": opts.Width, "num_gpu": opts.Height, "num_predict": opts.Steps, "seed": opts.Seed, }, } if keepAlive != nil { req.KeepAlive = keepAlive } // Show loading spinner until generation starts p := progress.NewProgress(os.Stderr) spinner := progress.NewSpinner("") p.Add("", spinner) var stepBar *progress.StepBar var imagePath string err = client.Generate(cmd.Context(), req, func(resp api.GenerateResponse) error { content := resp.Response // Handle progress updates - parse step info and switch to step bar if strings.HasPrefix(content, "\rGenerating:") { var step, total int fmt.Sscanf(content, "\rGenerating: step %d/%d", &step, &total) if stepBar == nil && total > 0 { spinner.Stop() stepBar = progress.NewStepBar("Generating", total) p.Add("", stepBar) } if stepBar != nil { stepBar.Set(step) } return nil } // Handle final response with image path if resp.Done && strings.Contains(content, "Image saved to:") { if idx := strings.Index(content, "Image saved to: "); idx >= 0 { imagePath = strings.TrimSpace(content[idx+16:]) } } return nil }) p.Stop() if err != nil { return err } if imagePath != "" { displayImageInTerminal(imagePath) fmt.Printf("Image saved to: %s\n", imagePath) } return nil } // runInteractive runs an interactive REPL for image generation. func runInteractive(cmd *cobra.Command, modelName string, keepAlive *api.Duration, opts ImageGenOptions) error { client, err := api.ClientFromEnvironment() if err != nil { return err } scanner, err := readline.New(readline.Prompt{ Prompt: ">>> ", Placeholder: "Describe an image to generate (/help for commands)", }) if err != nil { return err } if envconfig.NoHistory() { scanner.HistoryDisable() } for { line, err := scanner.Readline() switch { case errors.Is(err, io.EOF): fmt.Println() return nil case errors.Is(err, readline.ErrInterrupt): if line == "" { fmt.Println("\nUse Ctrl + d or /bye to exit.") } continue case err != nil: return err } line = strings.TrimSpace(line) if line == "" { continue } // Handle commands switch { case strings.HasPrefix(line, "/bye"): return nil case strings.HasPrefix(line, "/?"), strings.HasPrefix(line, "/help"): printInteractiveHelp(opts) continue case strings.HasPrefix(line, "/set "): if err := handleSetCommand(line[5:], &opts); err != nil { fmt.Fprintf(os.Stderr, "Error: %v\n", err) } continue case strings.HasPrefix(line, "/show"): printCurrentSettings(opts) continue case strings.HasPrefix(line, "/"): fmt.Fprintf(os.Stderr, "Unknown command: %s (try /help)\n", line) continue } // Generate image with current options req := &api.GenerateRequest{ Model: modelName, Prompt: line, Options: map[string]any{ "num_ctx": opts.Width, "num_gpu": opts.Height, "num_predict": opts.Steps, "seed": opts.Seed, }, } if keepAlive != nil { req.KeepAlive = keepAlive } // Show loading spinner until generation starts p := progress.NewProgress(os.Stderr) spinner := progress.NewSpinner("") p.Add("", spinner) var stepBar *progress.StepBar var imagePath string err = client.Generate(cmd.Context(), req, func(resp api.GenerateResponse) error { content := resp.Response // Handle progress updates - parse step info and switch to step bar if strings.HasPrefix(content, "\rGenerating:") { var step, total int fmt.Sscanf(content, "\rGenerating: step %d/%d", &step, &total) if stepBar == nil && total > 0 { spinner.Stop() stepBar = progress.NewStepBar("Generating", total) p.Add("", stepBar) } if stepBar != nil { stepBar.Set(step) } return nil } // Handle final response with image path if resp.Done && strings.Contains(content, "Image saved to:") { if idx := strings.Index(content, "Image saved to: "); idx >= 0 { imagePath = strings.TrimSpace(content[idx+16:]) } } return nil }) p.Stop() if err != nil { fmt.Fprintf(os.Stderr, "Error: %v\n", err) continue } // Copy image to current directory with descriptive name if imagePath != "" { // Create filename from prompt (sanitized) safeName := sanitizeFilename(line) if len(safeName) > 50 { safeName = safeName[:50] } timestamp := time.Now().Format("20060102-150405") newName := fmt.Sprintf("%s-%s.png", safeName, timestamp) // Copy file to CWD if err := copyFile(imagePath, newName); err != nil { fmt.Fprintf(os.Stderr, "Error saving to current directory: %v\n", err) displayImageInTerminal(imagePath) fmt.Printf("Image saved to: %s\n", imagePath) } else { displayImageInTerminal(newName) fmt.Printf("Image saved to: %s\n", newName) } } fmt.Println() } } // sanitizeFilename removes characters that aren't safe for filenames. func sanitizeFilename(s string) string { s = strings.ToLower(s) s = strings.ReplaceAll(s, " ", "-") // Remove any character that's not alphanumeric or hyphen var result strings.Builder for _, r := range s { if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' { result.WriteRune(r) } } return result.String() } // copyFile copies a file from src to dst. func copyFile(src, dst string) error { sourceFile, err := os.Open(src) if err != nil { return err } defer sourceFile.Close() destFile, err := os.Create(dst) if err != nil { return err } defer destFile.Close() _, err = io.Copy(destFile, sourceFile) return err } // printInteractiveHelp prints help for interactive mode commands. func printInteractiveHelp(opts ImageGenOptions) { fmt.Fprintln(os.Stderr, "Commands:") fmt.Fprintln(os.Stderr, " /set width Set image width (current:", opts.Width, ")") fmt.Fprintln(os.Stderr, " /set height Set image height (current:", opts.Height, ")") fmt.Fprintln(os.Stderr, " /set steps Set denoising steps (current:", opts.Steps, ")") fmt.Fprintln(os.Stderr, " /set seed Set random seed (current:", opts.Seed, ", 0=random)") fmt.Fprintln(os.Stderr, " /set negative Set negative prompt") fmt.Fprintln(os.Stderr, " /show Show current settings") fmt.Fprintln(os.Stderr, " /bye Exit") fmt.Fprintln(os.Stderr) fmt.Fprintln(os.Stderr, "Or type a prompt to generate an image.") fmt.Fprintln(os.Stderr) } // printCurrentSettings prints the current image generation settings. func printCurrentSettings(opts ImageGenOptions) { fmt.Fprintf(os.Stderr, "Current settings:\n") fmt.Fprintf(os.Stderr, " width: %d\n", opts.Width) fmt.Fprintf(os.Stderr, " height: %d\n", opts.Height) fmt.Fprintf(os.Stderr, " steps: %d\n", opts.Steps) fmt.Fprintf(os.Stderr, " seed: %d (0=random)\n", opts.Seed) if opts.NegativePrompt != "" { fmt.Fprintf(os.Stderr, " negative: %s\n", opts.NegativePrompt) } fmt.Fprintln(os.Stderr) } // handleSetCommand handles /set commands to change options. func handleSetCommand(args string, opts *ImageGenOptions) error { parts := strings.SplitN(args, " ", 2) if len(parts) < 2 { return fmt.Errorf("usage: /set