Files
ollama/app/tools/filesystem.go
2025-10-23 11:27:41 -07:00

625 lines
16 KiB
Go

package tools
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/ledongthuc/pdf"
)
// FileInfo represents information about a single file or directory
type FileInfo struct {
// BasePath string `json:"base_path"`
RelPath string `json:"rel_path"`
IsDir bool `json:"is_dir"`
}
// FileListResult represents the result of a directory listing operation
type FileListResult struct {
BasePath string `json:"base_path"`
Files []FileInfo `json:"files"`
Count int `json:"count"`
}
// FileReadResult represents the result of a file read operation
type FileReadResult struct {
Path string `json:"path"`
TotalLines int `json:"total_lines"`
LinesRead int `json:"lines_read"`
Content string `json:"content"`
}
// FileWriteResult represents the result of a file write operation
type FileWriteResult struct {
Path string `json:"path"`
Size int64 `json:"size,omitempty"`
Written int `json:"written"`
Mode string `json:"mode,omitempty"`
Modified int64 `json:"modified,omitempty"`
}
// FileReader implements the file reading functionality
type FileReader struct {
workingDir string
}
func (f *FileReader) SetWorkingDir(dir string) {
f.workingDir = dir
}
func (f *FileReader) Name() string {
return "file_read"
}
func (f *FileReader) Description() string {
return "Read the contents of a file from the file system"
}
func (f *FileReader) Prompt() string {
// TODO: read iteratively in agent mode, full in single shot - control with prompt?
return `Use the file_read tool to read the contents of a file using the path parameter. read_full is false by default and will return the first 100 lines of the file, if the user requires more information about the file, set read_full to true`
}
func (f *FileReader) Schema() map[string]any {
schemaBytes := []byte(`{
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "The path to the file to read"
},
"read_full": {
"type": "boolean",
"description": "returns the first 100 lines of the file when set to false (default: false)",
"default": false
}
},
"required": ["path"]
}`)
var schema map[string]any
if err := json.Unmarshal(schemaBytes, &schema); err != nil {
return nil
}
return schema
}
func (f *FileReader) Execute(ctx context.Context, args map[string]any) (any, error) {
fmt.Println("file_read tool called", args)
path, ok := args["path"].(string)
if !ok {
return nil, fmt.Errorf("path parameter is required and must be a string")
}
// If path is not absolute and working directory is set, make it relative to working directory
if !filepath.IsAbs(path) && f.workingDir != "" {
path = filepath.Join(f.workingDir, path)
}
// Security: Clean and validate the path
cleanPath := filepath.Clean(path)
if strings.Contains(cleanPath, "..") {
return nil, fmt.Errorf("path traversal not allowed")
}
// Get max size limit
maxSize := int64(1024 * 1024) // 1MB default
if ms, ok := args["max_size"]; ok {
switch v := ms.(type) {
case float64:
maxSize = int64(v)
case int:
maxSize = int64(v)
case int64:
maxSize = v
}
}
// Check if file exists and get info
info, err := os.Stat(cleanPath)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("file does not exist: %s", cleanPath)
}
return nil, fmt.Errorf("error accessing file: %w", err)
}
// Check if it's a directory
if info.IsDir() {
return nil, fmt.Errorf("path is a directory, not a file: %s", cleanPath)
}
// Check file size
if info.Size() > maxSize {
return nil, fmt.Errorf("file too large (%d bytes), maximum allowed: %d bytes", info.Size(), maxSize)
}
if strings.HasSuffix(strings.ToLower(cleanPath), ".pdf") {
return f.readPDFFile(cleanPath, args)
}
// Check read_full parameter
readFull := false // default to false
if rf, ok := args["read_full"]; ok {
readFull, _ = rf.(bool)
}
// Open and read the file
file, err := os.Open(cleanPath)
if err != nil {
return nil, fmt.Errorf("error opening file: %w", err)
}
defer file.Close()
// Read file content
scanner := bufio.NewScanner(file)
var lines []string
totalLines := 0
// Read content, keeping track of total lines but only storing up to 100 if !readFull
for scanner.Scan() {
totalLines++
if readFull || totalLines <= 100 {
lines = append(lines, scanner.Text())
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("error reading file: %w", err)
}
content := strings.Join(lines, "\n")
return &FileReadResult{
Path: cleanPath,
LinesRead: len(lines),
TotalLines: totalLines,
Content: content,
}, nil
}
// readPDFFile extracts text from a PDF file
func (f *FileReader) readPDFFile(cleanPath string, args map[string]any) (any, error) {
// Open the PDF file
pdfFile, r, err := pdf.Open(cleanPath)
if err != nil {
return nil, fmt.Errorf("error opening PDF: %w", err)
}
defer pdfFile.Close()
// Get total number of pages
totalPages := r.NumPage()
// Check read_full parameter - for PDFs, this controls whether to read all pages
readFull := false
if rf, ok := args["read_full"]; ok {
readFull, _ = rf.(bool)
}
// Extract text from pages
var allText strings.Builder
maxPages := 10 // Default to first 10 pages if not read_full
if readFull {
maxPages = totalPages
}
linesExtracted := 0
for pageNum := 1; pageNum <= totalPages && pageNum <= maxPages; pageNum++ {
// Get page
page := r.Page(pageNum)
if page.V.IsNull() {
continue
}
// Use the built-in GetPlainText method which handles text extraction better
pageText, err := page.GetPlainText(nil)
if err != nil {
// If GetPlainText fails, fall back to manual extraction
pageText = f.extractTextFromPage(page)
}
pageText = strings.TrimSpace(pageText)
if pageText != "" {
if allText.Len() > 0 {
allText.WriteString("\n\n")
}
allText.WriteString(fmt.Sprintf("--- Page %d ---\n", pageNum))
allText.WriteString(pageText)
// Count lines for reporting
linesExtracted += strings.Count(pageText, "\n") + 1
}
}
content := strings.TrimSpace(allText.String())
// If no text was extracted, return a helpful message
if content == "" {
content = "[PDF file contains no extractable text - it may contain only images or use complex encoding]"
linesExtracted = 1
}
return &FileReadResult{
Path: cleanPath,
LinesRead: linesExtracted,
TotalLines: totalPages, // For PDFs, we report pages as "lines"
Content: content,
}, nil
}
// extractTextFromPage extracts text from a single PDF page
func (f *FileReader) extractTextFromPage(page pdf.Page) string {
var buf bytes.Buffer
// Get page contents
contents := page.Content()
// Group text elements that appear to be part of the same word/line
var currentLine strings.Builder
lastX := -1.0
for i, t := range contents.Text {
// Skip empty text
if t.S == "" {
continue
}
// Check if this text element is on a new line or far from the previous one
// If X position is significantly different or we've reset to the beginning, it's likely a new word
if lastX >= 0 && (t.X < lastX-10 || t.X > lastX+50) {
// Add the accumulated line to buffer with a space
if currentLine.Len() > 0 {
buf.WriteString(currentLine.String())
buf.WriteString(" ")
currentLine.Reset()
}
}
// Add the text without extra spaces
currentLine.WriteString(t.S)
lastX = t.X
// Check if next element exists and has significantly different Y position (new line)
if i+1 < len(contents.Text) && contents.Text[i+1].Y > t.Y+5 {
if currentLine.Len() > 0 {
buf.WriteString(currentLine.String())
buf.WriteString("\n")
currentLine.Reset()
lastX = -1.0
}
}
}
// Add any remaining text
if currentLine.Len() > 0 {
buf.WriteString(currentLine.String())
}
return strings.TrimSpace(buf.String())
}
// FileList implements the directory listing functionality
type FileList struct {
workingDir string
}
func (f *FileList) SetWorkingDir(dir string) {
f.workingDir = dir
}
func (f *FileList) Name() string {
return "file_list"
}
func (f *FileList) Description() string {
return "List the contents of a directory"
}
func (f *FileList) Prompt() string {
return `Use the file_list tool to list the contents of a directory using the path parameter`
}
func (f *FileList) Schema() map[string]any {
schemaBytes := []byte(`{
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "The path to the directory to list (default: current directory)",
"default": "."
},
"show_hidden": {
"type": "boolean",
"description": "Whether to show hidden files (starting with .)",
"default": false
},
"depth": {
"type": "integer",
"description": "How many directory levels deep to list (default: 1)",
"default": 1
}
},
"required": []
}`)
var schema map[string]any
if err := json.Unmarshal(schemaBytes, &schema); err != nil {
return nil
}
return schema
}
func (f *FileList) Execute(ctx context.Context, args map[string]any) (any, error) {
path := "."
if p, ok := args["path"].(string); ok {
path = p
}
// If path is not absolute and working directory is set, make it relative to working directory
if !filepath.IsAbs(path) && f.workingDir != "" {
path = filepath.Join(f.workingDir, path)
}
// Security: Clean and validate the path
cleanPath := filepath.Clean(path)
if strings.Contains(cleanPath, "..") {
return nil, fmt.Errorf("path traversal not allowed")
}
// Get optional parameters
showHidden := false
if sh, ok := args["show_hidden"].(bool); ok {
showHidden = sh
}
maxDepth := 1
if md, ok := args["depth"].(float64); ok {
maxDepth = int(md)
}
// Check if directory exists
info, err := os.Stat(cleanPath)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("directory does not exist: %s", cleanPath)
}
return nil, fmt.Errorf("error accessing directory: %w", err)
}
if !info.IsDir() {
return nil, fmt.Errorf("path is not a directory: %s", cleanPath)
}
var files []FileInfo
files, err = f.listRecursive(cleanPath, showHidden, maxDepth, 0)
if err != nil {
return nil, err
}
return &FileListResult{
BasePath: cleanPath,
Files: files,
Count: len(files),
}, nil
}
func (f *FileList) listDirectory(path string, showHidden bool) ([]FileInfo, error) {
entries, err := os.ReadDir(path)
if err != nil {
return nil, fmt.Errorf("error reading directory: %w", err)
}
var files []FileInfo
for _, entry := range entries {
name := entry.Name()
// Skip hidden files if not requested
if !showHidden && strings.HasPrefix(name, ".") {
continue
}
fileInfo := FileInfo{
RelPath: name,
IsDir: entry.IsDir(),
}
files = append(files, fileInfo)
}
return files, nil
}
func (f *FileList) listRecursive(path string, showHidden bool, maxDepth, currentDepth int) ([]FileInfo, error) {
if currentDepth >= maxDepth {
return nil, nil
}
files, err := f.listDirectory(path, showHidden)
if err != nil {
return nil, err
}
var allFiles []FileInfo
for _, file := range files {
// For the first level, use the file name as is
// For deeper levels, join with parent directory
if currentDepth != 0 {
// Get the relative part of the path by removing the base path
rel, err := filepath.Rel(filepath.Dir(path), path)
if err == nil {
file.RelPath = filepath.Join(rel, file.RelPath)
}
}
allFiles = append(allFiles, file)
if file.IsDir {
subFiles, err := f.listRecursive(filepath.Join(path, file.RelPath), showHidden, maxDepth, currentDepth+1)
if err != nil {
continue // Skip directories we can't read
}
allFiles = append(allFiles, subFiles...)
}
}
return allFiles, nil
}
// FileWriter implements the file writing functionality
// TODO(parthsareen): max file size limit
type FileWriter struct {
workingDir string
}
func (f *FileWriter) SetWorkingDir(dir string) {
f.workingDir = dir
}
func (f *FileWriter) Name() string {
return "file_write"
}
func (f *FileWriter) Description() string {
return "Write content to a file on the file system"
}
func (f *FileWriter) Prompt() string {
return `Use the file_write tool to write content to a file using the path parameter`
}
func (f *FileWriter) Schema() map[string]any {
schemaBytes := []byte(`{
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "The path to the file to write"
},
"content": {
"type": "string",
"description": "The content to write to the file"
},
"append": {
"type": "boolean",
"description": "Whether to append to the file instead of overwriting (default: false)",
"default": false
},
"create_dirs": {
"type": "boolean",
"description": "Whether to create parent directories if they don't exist (default: false)",
"default": false
},
"max_size": {
"type": "integer",
"description": "Maximum content size to write in bytes (default: 1MB)",
"default": 1024 * 1024
}
},
"required": ["path", "content"]
}`)
var schema map[string]any
if err := json.Unmarshal(schemaBytes, &schema); err != nil {
return nil
}
return schema
}
func (f *FileWriter) Execute(ctx context.Context, args map[string]any) (any, error) {
path, ok := args["path"].(string)
if !ok {
return nil, fmt.Errorf("path parameter is required and must be a string")
}
// If path is not absolute and working directory is set, make it relative to working directory
if !filepath.IsAbs(path) && f.workingDir != "" {
path = filepath.Join(f.workingDir, path)
}
// Extract required parameters
content, ok := args["content"].(string)
if !ok {
return nil, fmt.Errorf("content parameter is required and must be a string")
}
// Get optional parameters with defaults
append := true // Always append by default
if a, ok := args["append"].(bool); ok && !a {
return nil, fmt.Errorf("overwriting existing files is not allowed - must use append mode")
}
createDirs := false
if cd, ok := args["create_dirs"].(bool); ok {
createDirs = cd
}
maxSize := int64(1024 * 1024) // 1MB default
if ms, ok := args["max_size"].(float64); ok {
maxSize = int64(ms)
}
// Security: Clean and validate the path
cleanPath := filepath.Clean(path)
if strings.Contains(cleanPath, "..") {
return nil, fmt.Errorf("path traversal not allowed")
}
// Check content size
if int64(len(content)) > maxSize {
return nil, fmt.Errorf("content too large (%d bytes), maximum allowed: %d bytes", len(content), maxSize)
}
// Create parent directories if requested
if createDirs {
dir := filepath.Dir(cleanPath)
if err := os.MkdirAll(dir, 0755); err != nil {
return nil, fmt.Errorf("failed to create parent directories: %w", err)
}
}
// Check if file exists - if it does, we must append
fileInfo, err := os.Stat(cleanPath)
if err == nil && fileInfo.Size() > 0 {
// File exists and has content
if !append {
return nil, fmt.Errorf("file %s already exists - cannot overwrite, must use append mode", cleanPath)
}
}
// Open file in append mode
flag := os.O_WRONLY | os.O_CREATE | os.O_APPEND
file, err := os.OpenFile(cleanPath, flag, 0644)
if err != nil {
return nil, fmt.Errorf("error opening file for writing: %w", err)
}
defer file.Close()
// Write content
n, err := file.WriteString(content)
if err != nil {
return nil, fmt.Errorf("error writing to file: %w", err)
}
// Get file info for response
info, err := file.Stat()
if err != nil {
// Return basic success info if we can't get file stats
return &FileWriteResult{
Path: cleanPath,
Written: n,
}, nil
}
return &FileWriteResult{
Path: cleanPath,
Size: info.Size(),
Written: n,
Mode: info.Mode().String(),
Modified: info.ModTime().Unix(),
}, nil
}