ollama/app/tools/filesystem.go

package tools

import (
	"bufio"
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"os"
	"path/filepath"
	"strings"

	"github.com/ledongthuc/pdf"
)

// FileInfo represents information about a single file or directory
type FileInfo struct {
	// BasePath string `json:"base_path"`
	RelPath string `json:"rel_path"`
	IsDir   bool   `json:"is_dir"`
}

// FileListResult represents the result of a directory listing operation
type FileListResult struct {
	BasePath string     `json:"base_path"`
	Files    []FileInfo `json:"files"`
	Count    int        `json:"count"`
}

// FileReadResult represents the result of a file read operation
type FileReadResult struct {
	Path       string `json:"path"`
	TotalLines int    `json:"total_lines"`
	LinesRead  int    `json:"lines_read"`
	Content    string `json:"content"`
}

// FileWriteResult represents the result of a file write operation
type FileWriteResult struct {
	Path     string `json:"path"`
	Size     int64  `json:"size,omitempty"`
	Written  int    `json:"written"`
	Mode     string `json:"mode,omitempty"`
	Modified int64  `json:"modified,omitempty"`
}

// FileReader implements the file reading functionality
type FileReader struct {
	workingDir string
}

func (f *FileReader) SetWorkingDir(dir string) {
	f.workingDir = dir
}

func (f *FileReader) Name() string {
	return "file_read"
}

func (f *FileReader) Description() string {
	return "Read the contents of a file from the file system"
}

func (f *FileReader) Prompt() string {
	// TODO: read iteratively in agent mode, full in single shot - control with prompt?
	return `Use the file_read tool to read the contents of a file using the path parameter. read_full is false by default and will return the first 100 lines of the file, if the user requires more information about the file, set read_full to true`
}

func (f *FileReader) Schema() map[string]any {
	schemaBytes := []byte(`{
		"type": "object",
		"properties": {
			"path": {
				"type": "string",
				"description": "The path to the file to read"
			},
			"read_full": {
				"type": "boolean",
				"description": "returns the first 100 lines of the file when set to false (default: false)",
				"default": false
			}
		},
		"required": ["path"]
	}`)
	var schema map[string]any
	if err := json.Unmarshal(schemaBytes, &schema); err != nil {
		return nil
	}
	return schema
}

func (f *FileReader) Execute(ctx context.Context, args map[string]any) (any, error) {
	fmt.Println("file_read tool called", args)
	path, ok := args["path"].(string)
	if !ok {
		return nil, fmt.Errorf("path parameter is required and must be a string")
	}

	// If path is not absolute and working directory is set, make it relative to working directory
	if !filepath.IsAbs(path) && f.workingDir != "" {
		path = filepath.Join(f.workingDir, path)
	}

	// Security: Clean and validate the path
	cleanPath := filepath.Clean(path)
	if strings.Contains(cleanPath, "..") {
		return nil, fmt.Errorf("path traversal not allowed")
	}

	// Get max size limit
	maxSize := int64(1024 * 1024) // 1MB default
	if ms, ok := args["max_size"]; ok {
		switch v := ms.(type) {
		case float64:
			maxSize = int64(v)
		case int:
			maxSize = int64(v)
		case int64:
			maxSize = v
		}
	}

	// Check if file exists and get info
	info, err := os.Stat(cleanPath)
	if err != nil {
		if os.IsNotExist(err) {
			return nil, fmt.Errorf("file does not exist: %s", cleanPath)
		}
		return nil, fmt.Errorf("error accessing file: %w", err)
	}

	// Check if it's a directory
	if info.IsDir() {
		return nil, fmt.Errorf("path is a directory, not a file: %s", cleanPath)
	}

	// Check file size
	if info.Size() > maxSize {
		return nil, fmt.Errorf("file too large (%d bytes), maximum allowed: %d bytes", info.Size(), maxSize)
	}

	if strings.HasSuffix(strings.ToLower(cleanPath), ".pdf") {
		return f.readPDFFile(cleanPath, args)
	}

	// Check read_full parameter
	readFull := false // default to false
	if rf, ok := args["read_full"]; ok {
		readFull, _ = rf.(bool)
	}

	// Open and read the file
	file, err := os.Open(cleanPath)
	if err != nil {
		return nil, fmt.Errorf("error opening file: %w", err)
	}
	defer file.Close()

	// Read file content
	scanner := bufio.NewScanner(file)
	var lines []string
	totalLines := 0

	// Read content, keeping track of total lines but only storing up to 100 if !readFull
	for scanner.Scan() {
		totalLines++
		if readFull || totalLines <= 100 {
			lines = append(lines, scanner.Text())
		}
	}

	if err := scanner.Err(); err != nil {
		return nil, fmt.Errorf("error reading file: %w", err)
	}

	content := strings.Join(lines, "\n")

	return &FileReadResult{
		Path:       cleanPath,
		LinesRead:  len(lines),
		TotalLines: totalLines,
		Content:    content,
	}, nil
}

// readPDFFile extracts text from a PDF file
func (f *FileReader) readPDFFile(cleanPath string, args map[string]any) (any, error) {
	// Open the PDF file
	pdfFile, r, err := pdf.Open(cleanPath)
	if err != nil {
		return nil, fmt.Errorf("error opening PDF: %w", err)
	}
	defer pdfFile.Close()

	// Get total number of pages
	totalPages := r.NumPage()

	// Check read_full parameter - for PDFs, this controls whether to read all pages
	readFull := false
	if rf, ok := args["read_full"]; ok {
		readFull, _ = rf.(bool)
	}

	// Extract text from pages
	var allText strings.Builder
	maxPages := 10 // Default to first 10 pages if not read_full
	if readFull {
		maxPages = totalPages
	}

	linesExtracted := 0
	for pageNum := 1; pageNum <= totalPages && pageNum <= maxPages; pageNum++ {
		// Get page
		page := r.Page(pageNum)
		if page.V.IsNull() {
			continue
		}

		// Use the built-in GetPlainText method which handles text extraction better
		pageText, err := page.GetPlainText(nil)
		if err != nil {
			// If GetPlainText fails, fall back to manual extraction
			pageText = f.extractTextFromPage(page)
		}

		pageText = strings.TrimSpace(pageText)
		if pageText != "" {
			if allText.Len() > 0 {
				allText.WriteString("\n\n")
			}
			allText.WriteString(fmt.Sprintf("--- Page %d ---\n", pageNum))
			allText.WriteString(pageText)

			// Count lines for reporting
			linesExtracted += strings.Count(pageText, "\n") + 1
		}
	}

	content := strings.TrimSpace(allText.String())

	// If no text was extracted, return a helpful message
	if content == "" {
		content = "[PDF file contains no extractable text - it may contain only images or use complex encoding]"
		linesExtracted = 1
	}

	return &FileReadResult{
		Path:       cleanPath,
		LinesRead:  linesExtracted,
		TotalLines: totalPages, // For PDFs, we report pages as "lines"
		Content:    content,
	}, nil
}

// extractTextFromPage extracts text from a single PDF page
func (f *FileReader) extractTextFromPage(page pdf.Page) string {
	var buf bytes.Buffer

	// Get page contents
	contents := page.Content()

	// Group text elements that appear to be part of the same word/line
	var currentLine strings.Builder
	lastX := -1.0

	for i, t := range contents.Text {
		// Skip empty text
		if t.S == "" {
			continue
		}

		// Check if this text element is on a new line or far from the previous one
		// If X position is significantly different or we've reset to the beginning, it's likely a new word
		if lastX >= 0 && (t.X < lastX-10 || t.X > lastX+50) {
			// Add the accumulated line to buffer with a space
			if currentLine.Len() > 0 {
				buf.WriteString(currentLine.String())
				buf.WriteString(" ")
				currentLine.Reset()
			}
		}

		// Add the text without extra spaces
		currentLine.WriteString(t.S)
		lastX = t.X

		// Check if next element exists and has significantly different Y position (new line)
		if i+1 < len(contents.Text) && contents.Text[i+1].Y > t.Y+5 {
			if currentLine.Len() > 0 {
				buf.WriteString(currentLine.String())
				buf.WriteString("\n")
				currentLine.Reset()
				lastX = -1.0
			}
		}
	}

	// Add any remaining text
	if currentLine.Len() > 0 {
		buf.WriteString(currentLine.String())
	}

	return strings.TrimSpace(buf.String())
}

// FileList implements the directory listing functionality
type FileList struct {
	workingDir string
}

func (f *FileList) SetWorkingDir(dir string) {
	f.workingDir = dir
}

func (f *FileList) Name() string {
	return "file_list"
}

func (f *FileList) Description() string {
	return "List the contents of a directory"
}

func (f *FileList) Prompt() string {
	return `Use the file_list tool to list the contents of a directory using the path parameter`
}

func (f *FileList) Schema() map[string]any {
	schemaBytes := []byte(`{
		"type": "object",
		"properties": {
			"path": {
				"type":        "string",
				"description": "The path to the directory to list (default: current directory)",
				"default":     "."
			},
			"show_hidden": {
				"type":        "boolean",
				"description": "Whether to show hidden files (starting with .)",
				"default":     false
			},
			"depth": {
				"type":        "integer",
				"description": "How many directory levels deep to list (default: 1)",
				"default":     1
			}
		},
		"required": []
	}`)
	var schema map[string]any
	if err := json.Unmarshal(schemaBytes, &schema); err != nil {
		return nil
	}
	return schema
}

func (f *FileList) Execute(ctx context.Context, args map[string]any) (any, error) {
	path := "."
	if p, ok := args["path"].(string); ok {
		path = p
	}

	// If path is not absolute and working directory is set, make it relative to working directory
	if !filepath.IsAbs(path) && f.workingDir != "" {
		path = filepath.Join(f.workingDir, path)
	}

	// Security: Clean and validate the path
	cleanPath := filepath.Clean(path)
	if strings.Contains(cleanPath, "..") {
		return nil, fmt.Errorf("path traversal not allowed")
	}

	// Get optional parameters
	showHidden := false
	if sh, ok := args["show_hidden"].(bool); ok {
		showHidden = sh
	}

	maxDepth := 1
	if md, ok := args["depth"].(float64); ok {
		maxDepth = int(md)
	}

	// Check if directory exists
	info, err := os.Stat(cleanPath)
	if err != nil {
		if os.IsNotExist(err) {
			return nil, fmt.Errorf("directory does not exist: %s", cleanPath)
		}
		return nil, fmt.Errorf("error accessing directory: %w", err)
	}

	if !info.IsDir() {
		return nil, fmt.Errorf("path is not a directory: %s", cleanPath)
	}

	var files []FileInfo

	files, err = f.listRecursive(cleanPath, showHidden, maxDepth, 0)

	if err != nil {
		return nil, err
	}

	return &FileListResult{
		BasePath: cleanPath,
		Files:    files,
		Count:    len(files),
	}, nil
}

func (f *FileList) listDirectory(path string, showHidden bool) ([]FileInfo, error) {
	entries, err := os.ReadDir(path)
	if err != nil {
		return nil, fmt.Errorf("error reading directory: %w", err)
	}

	var files []FileInfo
	for _, entry := range entries {
		name := entry.Name()

		// Skip hidden files if not requested
		if !showHidden && strings.HasPrefix(name, ".") {
			continue
		}

		fileInfo := FileInfo{
			RelPath: name,
			IsDir:   entry.IsDir(),
		}

		files = append(files, fileInfo)
	}

	return files, nil
}

func (f *FileList) listRecursive(path string, showHidden bool, maxDepth, currentDepth int) ([]FileInfo, error) {
	if currentDepth >= maxDepth {
		return nil, nil
	}

	files, err := f.listDirectory(path, showHidden)
	if err != nil {
		return nil, err
	}

	var allFiles []FileInfo
	for _, file := range files {
		// For the first level, use the file name as is
		// For deeper levels, join with parent directory
		if currentDepth != 0 {
			// Get the relative part of the path by removing the base path
			rel, err := filepath.Rel(filepath.Dir(path), path)
			if err == nil {
				file.RelPath = filepath.Join(rel, file.RelPath)
			}
		}
		allFiles = append(allFiles, file)

		if file.IsDir {
			subFiles, err := f.listRecursive(filepath.Join(path, file.RelPath), showHidden, maxDepth, currentDepth+1)
			if err != nil {
				continue // Skip directories we can't read
			}
			allFiles = append(allFiles, subFiles...)
		}
	}

	return allFiles, nil
}

// FileWriter implements the file writing functionality
// TODO(parthsareen): max file size limit
type FileWriter struct {
	workingDir string
}

func (f *FileWriter) SetWorkingDir(dir string) {
	f.workingDir = dir
}

func (f *FileWriter) Name() string {
	return "file_write"
}

func (f *FileWriter) Description() string {
	return "Write content to a file on the file system"
}

func (f *FileWriter) Prompt() string {
	return `Use the file_write tool to write content to a file using the path parameter`
}

func (f *FileWriter) Schema() map[string]any {
	schemaBytes := []byte(`{
		"type": "object",
		"properties": {
			"path": {
				"type":        "string",
				"description": "The path to the file to write"
			},
			"content": {
				"type":        "string",
				"description": "The content to write to the file"
			},
			"append": {
				"type":        "boolean",
				"description": "Whether to append to the file instead of overwriting (default: false)",
				"default":     false
			},
			"create_dirs": {
				"type":        "boolean",
				"description": "Whether to create parent directories if they don't exist (default: false)",
				"default":     false
			},
			"max_size": {
				"type":        "integer",
				"description": "Maximum content size to write in bytes (default: 1MB)",
				"default":     1024 * 1024
			}
		},
		"required": ["path", "content"]
	}`)
	var schema map[string]any
	if err := json.Unmarshal(schemaBytes, &schema); err != nil {
		return nil
	}
	return schema
}

func (f *FileWriter) Execute(ctx context.Context, args map[string]any) (any, error) {
	path, ok := args["path"].(string)
	if !ok {
		return nil, fmt.Errorf("path parameter is required and must be a string")
	}

	// If path is not absolute and working directory is set, make it relative to working directory
	if !filepath.IsAbs(path) && f.workingDir != "" {
		path = filepath.Join(f.workingDir, path)
	}

	// Extract required parameters
	content, ok := args["content"].(string)
	if !ok {
		return nil, fmt.Errorf("content parameter is required and must be a string")
	}

	// Get optional parameters with defaults
	append := true // Always append by default
	if a, ok := args["append"].(bool); ok && !a {
		return nil, fmt.Errorf("overwriting existing files is not allowed - must use append mode")
	}

	createDirs := false
	if cd, ok := args["create_dirs"].(bool); ok {
		createDirs = cd
	}

	maxSize := int64(1024 * 1024) // 1MB default
	if ms, ok := args["max_size"].(float64); ok {
		maxSize = int64(ms)
	}

	// Security: Clean and validate the path
	cleanPath := filepath.Clean(path)
	if strings.Contains(cleanPath, "..") {
		return nil, fmt.Errorf("path traversal not allowed")
	}

	// Check content size
	if int64(len(content)) > maxSize {
		return nil, fmt.Errorf("content too large (%d bytes), maximum allowed: %d bytes", len(content), maxSize)
	}

	// Create parent directories if requested
	if createDirs {
		dir := filepath.Dir(cleanPath)
		if err := os.MkdirAll(dir, 0755); err != nil {
			return nil, fmt.Errorf("failed to create parent directories: %w", err)
		}
	}

	// Check if file exists - if it does, we must append
	fileInfo, err := os.Stat(cleanPath)
	if err == nil && fileInfo.Size() > 0 {
		// File exists and has content
		if !append {
			return nil, fmt.Errorf("file %s already exists - cannot overwrite, must use append mode", cleanPath)
		}
	}

	// Open file in append mode
	flag := os.O_WRONLY | os.O_CREATE | os.O_APPEND
	file, err := os.OpenFile(cleanPath, flag, 0644)
	if err != nil {
		return nil, fmt.Errorf("error opening file for writing: %w", err)
	}
	defer file.Close()

	// Write content
	n, err := file.WriteString(content)
	if err != nil {
		return nil, fmt.Errorf("error writing to file: %w", err)
	}

	// Get file info for response
	info, err := file.Stat()
	if err != nil {
		// Return basic success info if we can't get file stats
		return &FileWriteResult{
			Path:    cleanPath,
			Written: n,
		}, nil
	}

	return &FileWriteResult{
		Path:     cleanPath,
		Size:     info.Size(),
		Written:  n,
		Mode:     info.Mode().String(),
		Modified: info.ModTime().Unix(),
	}, nil
}