mirror of
https://github.com/ollama/ollama.git
synced 2026-04-23 17:29:54 +02:00
625 lines
16 KiB
Go
625 lines
16 KiB
Go
package tools
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/ledongthuc/pdf"
|
|
)
|
|
|
|
// FileInfo represents information about a single file or directory
|
|
type FileInfo struct {
|
|
// BasePath string `json:"base_path"`
|
|
RelPath string `json:"rel_path"`
|
|
IsDir bool `json:"is_dir"`
|
|
}
|
|
|
|
// FileListResult represents the result of a directory listing operation
|
|
type FileListResult struct {
|
|
BasePath string `json:"base_path"`
|
|
Files []FileInfo `json:"files"`
|
|
Count int `json:"count"`
|
|
}
|
|
|
|
// FileReadResult represents the result of a file read operation
|
|
type FileReadResult struct {
|
|
Path string `json:"path"`
|
|
TotalLines int `json:"total_lines"`
|
|
LinesRead int `json:"lines_read"`
|
|
Content string `json:"content"`
|
|
}
|
|
|
|
// FileWriteResult represents the result of a file write operation
|
|
type FileWriteResult struct {
|
|
Path string `json:"path"`
|
|
Size int64 `json:"size,omitempty"`
|
|
Written int `json:"written"`
|
|
Mode string `json:"mode,omitempty"`
|
|
Modified int64 `json:"modified,omitempty"`
|
|
}
|
|
|
|
// FileReader implements the file reading functionality
|
|
type FileReader struct {
|
|
workingDir string
|
|
}
|
|
|
|
func (f *FileReader) SetWorkingDir(dir string) {
|
|
f.workingDir = dir
|
|
}
|
|
|
|
func (f *FileReader) Name() string {
|
|
return "file_read"
|
|
}
|
|
|
|
func (f *FileReader) Description() string {
|
|
return "Read the contents of a file from the file system"
|
|
}
|
|
|
|
func (f *FileReader) Prompt() string {
|
|
// TODO: read iteratively in agent mode, full in single shot - control with prompt?
|
|
return `Use the file_read tool to read the contents of a file using the path parameter. read_full is false by default and will return the first 100 lines of the file, if the user requires more information about the file, set read_full to true`
|
|
}
|
|
|
|
func (f *FileReader) Schema() map[string]any {
|
|
schemaBytes := []byte(`{
|
|
"type": "object",
|
|
"properties": {
|
|
"path": {
|
|
"type": "string",
|
|
"description": "The path to the file to read"
|
|
},
|
|
"read_full": {
|
|
"type": "boolean",
|
|
"description": "returns the first 100 lines of the file when set to false (default: false)",
|
|
"default": false
|
|
}
|
|
},
|
|
"required": ["path"]
|
|
}`)
|
|
var schema map[string]any
|
|
if err := json.Unmarshal(schemaBytes, &schema); err != nil {
|
|
return nil
|
|
}
|
|
return schema
|
|
}
|
|
|
|
func (f *FileReader) Execute(ctx context.Context, args map[string]any) (any, error) {
|
|
fmt.Println("file_read tool called", args)
|
|
path, ok := args["path"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("path parameter is required and must be a string")
|
|
}
|
|
|
|
// If path is not absolute and working directory is set, make it relative to working directory
|
|
if !filepath.IsAbs(path) && f.workingDir != "" {
|
|
path = filepath.Join(f.workingDir, path)
|
|
}
|
|
|
|
// Security: Clean and validate the path
|
|
cleanPath := filepath.Clean(path)
|
|
if strings.Contains(cleanPath, "..") {
|
|
return nil, fmt.Errorf("path traversal not allowed")
|
|
}
|
|
|
|
// Get max size limit
|
|
maxSize := int64(1024 * 1024) // 1MB default
|
|
if ms, ok := args["max_size"]; ok {
|
|
switch v := ms.(type) {
|
|
case float64:
|
|
maxSize = int64(v)
|
|
case int:
|
|
maxSize = int64(v)
|
|
case int64:
|
|
maxSize = v
|
|
}
|
|
}
|
|
|
|
// Check if file exists and get info
|
|
info, err := os.Stat(cleanPath)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil, fmt.Errorf("file does not exist: %s", cleanPath)
|
|
}
|
|
return nil, fmt.Errorf("error accessing file: %w", err)
|
|
}
|
|
|
|
// Check if it's a directory
|
|
if info.IsDir() {
|
|
return nil, fmt.Errorf("path is a directory, not a file: %s", cleanPath)
|
|
}
|
|
|
|
// Check file size
|
|
if info.Size() > maxSize {
|
|
return nil, fmt.Errorf("file too large (%d bytes), maximum allowed: %d bytes", info.Size(), maxSize)
|
|
}
|
|
|
|
if strings.HasSuffix(strings.ToLower(cleanPath), ".pdf") {
|
|
return f.readPDFFile(cleanPath, args)
|
|
}
|
|
|
|
// Check read_full parameter
|
|
readFull := false // default to false
|
|
if rf, ok := args["read_full"]; ok {
|
|
readFull, _ = rf.(bool)
|
|
}
|
|
|
|
// Open and read the file
|
|
file, err := os.Open(cleanPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error opening file: %w", err)
|
|
}
|
|
defer file.Close()
|
|
|
|
// Read file content
|
|
scanner := bufio.NewScanner(file)
|
|
var lines []string
|
|
totalLines := 0
|
|
|
|
// Read content, keeping track of total lines but only storing up to 100 if !readFull
|
|
for scanner.Scan() {
|
|
totalLines++
|
|
if readFull || totalLines <= 100 {
|
|
lines = append(lines, scanner.Text())
|
|
}
|
|
}
|
|
|
|
if err := scanner.Err(); err != nil {
|
|
return nil, fmt.Errorf("error reading file: %w", err)
|
|
}
|
|
|
|
content := strings.Join(lines, "\n")
|
|
|
|
return &FileReadResult{
|
|
Path: cleanPath,
|
|
LinesRead: len(lines),
|
|
TotalLines: totalLines,
|
|
Content: content,
|
|
}, nil
|
|
}
|
|
|
|
// readPDFFile extracts text from a PDF file
|
|
func (f *FileReader) readPDFFile(cleanPath string, args map[string]any) (any, error) {
|
|
// Open the PDF file
|
|
pdfFile, r, err := pdf.Open(cleanPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error opening PDF: %w", err)
|
|
}
|
|
defer pdfFile.Close()
|
|
|
|
// Get total number of pages
|
|
totalPages := r.NumPage()
|
|
|
|
// Check read_full parameter - for PDFs, this controls whether to read all pages
|
|
readFull := false
|
|
if rf, ok := args["read_full"]; ok {
|
|
readFull, _ = rf.(bool)
|
|
}
|
|
|
|
// Extract text from pages
|
|
var allText strings.Builder
|
|
maxPages := 10 // Default to first 10 pages if not read_full
|
|
if readFull {
|
|
maxPages = totalPages
|
|
}
|
|
|
|
linesExtracted := 0
|
|
for pageNum := 1; pageNum <= totalPages && pageNum <= maxPages; pageNum++ {
|
|
// Get page
|
|
page := r.Page(pageNum)
|
|
if page.V.IsNull() {
|
|
continue
|
|
}
|
|
|
|
// Use the built-in GetPlainText method which handles text extraction better
|
|
pageText, err := page.GetPlainText(nil)
|
|
if err != nil {
|
|
// If GetPlainText fails, fall back to manual extraction
|
|
pageText = f.extractTextFromPage(page)
|
|
}
|
|
|
|
pageText = strings.TrimSpace(pageText)
|
|
if pageText != "" {
|
|
if allText.Len() > 0 {
|
|
allText.WriteString("\n\n")
|
|
}
|
|
allText.WriteString(fmt.Sprintf("--- Page %d ---\n", pageNum))
|
|
allText.WriteString(pageText)
|
|
|
|
// Count lines for reporting
|
|
linesExtracted += strings.Count(pageText, "\n") + 1
|
|
}
|
|
}
|
|
|
|
content := strings.TrimSpace(allText.String())
|
|
|
|
// If no text was extracted, return a helpful message
|
|
if content == "" {
|
|
content = "[PDF file contains no extractable text - it may contain only images or use complex encoding]"
|
|
linesExtracted = 1
|
|
}
|
|
|
|
return &FileReadResult{
|
|
Path: cleanPath,
|
|
LinesRead: linesExtracted,
|
|
TotalLines: totalPages, // For PDFs, we report pages as "lines"
|
|
Content: content,
|
|
}, nil
|
|
}
|
|
|
|
// extractTextFromPage extracts text from a single PDF page
|
|
func (f *FileReader) extractTextFromPage(page pdf.Page) string {
|
|
var buf bytes.Buffer
|
|
|
|
// Get page contents
|
|
contents := page.Content()
|
|
|
|
// Group text elements that appear to be part of the same word/line
|
|
var currentLine strings.Builder
|
|
lastX := -1.0
|
|
|
|
for i, t := range contents.Text {
|
|
// Skip empty text
|
|
if t.S == "" {
|
|
continue
|
|
}
|
|
|
|
// Check if this text element is on a new line or far from the previous one
|
|
// If X position is significantly different or we've reset to the beginning, it's likely a new word
|
|
if lastX >= 0 && (t.X < lastX-10 || t.X > lastX+50) {
|
|
// Add the accumulated line to buffer with a space
|
|
if currentLine.Len() > 0 {
|
|
buf.WriteString(currentLine.String())
|
|
buf.WriteString(" ")
|
|
currentLine.Reset()
|
|
}
|
|
}
|
|
|
|
// Add the text without extra spaces
|
|
currentLine.WriteString(t.S)
|
|
lastX = t.X
|
|
|
|
// Check if next element exists and has significantly different Y position (new line)
|
|
if i+1 < len(contents.Text) && contents.Text[i+1].Y > t.Y+5 {
|
|
if currentLine.Len() > 0 {
|
|
buf.WriteString(currentLine.String())
|
|
buf.WriteString("\n")
|
|
currentLine.Reset()
|
|
lastX = -1.0
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add any remaining text
|
|
if currentLine.Len() > 0 {
|
|
buf.WriteString(currentLine.String())
|
|
}
|
|
|
|
return strings.TrimSpace(buf.String())
|
|
}
|
|
|
|
// FileList implements the directory listing functionality
|
|
type FileList struct {
|
|
workingDir string
|
|
}
|
|
|
|
func (f *FileList) SetWorkingDir(dir string) {
|
|
f.workingDir = dir
|
|
}
|
|
|
|
func (f *FileList) Name() string {
|
|
return "file_list"
|
|
}
|
|
|
|
func (f *FileList) Description() string {
|
|
return "List the contents of a directory"
|
|
}
|
|
|
|
func (f *FileList) Prompt() string {
|
|
return `Use the file_list tool to list the contents of a directory using the path parameter`
|
|
}
|
|
|
|
func (f *FileList) Schema() map[string]any {
|
|
schemaBytes := []byte(`{
|
|
"type": "object",
|
|
"properties": {
|
|
"path": {
|
|
"type": "string",
|
|
"description": "The path to the directory to list (default: current directory)",
|
|
"default": "."
|
|
},
|
|
"show_hidden": {
|
|
"type": "boolean",
|
|
"description": "Whether to show hidden files (starting with .)",
|
|
"default": false
|
|
},
|
|
"depth": {
|
|
"type": "integer",
|
|
"description": "How many directory levels deep to list (default: 1)",
|
|
"default": 1
|
|
}
|
|
},
|
|
"required": []
|
|
}`)
|
|
var schema map[string]any
|
|
if err := json.Unmarshal(schemaBytes, &schema); err != nil {
|
|
return nil
|
|
}
|
|
return schema
|
|
}
|
|
|
|
func (f *FileList) Execute(ctx context.Context, args map[string]any) (any, error) {
|
|
path := "."
|
|
if p, ok := args["path"].(string); ok {
|
|
path = p
|
|
}
|
|
|
|
// If path is not absolute and working directory is set, make it relative to working directory
|
|
if !filepath.IsAbs(path) && f.workingDir != "" {
|
|
path = filepath.Join(f.workingDir, path)
|
|
}
|
|
|
|
// Security: Clean and validate the path
|
|
cleanPath := filepath.Clean(path)
|
|
if strings.Contains(cleanPath, "..") {
|
|
return nil, fmt.Errorf("path traversal not allowed")
|
|
}
|
|
|
|
// Get optional parameters
|
|
showHidden := false
|
|
if sh, ok := args["show_hidden"].(bool); ok {
|
|
showHidden = sh
|
|
}
|
|
|
|
maxDepth := 1
|
|
if md, ok := args["depth"].(float64); ok {
|
|
maxDepth = int(md)
|
|
}
|
|
|
|
// Check if directory exists
|
|
info, err := os.Stat(cleanPath)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil, fmt.Errorf("directory does not exist: %s", cleanPath)
|
|
}
|
|
return nil, fmt.Errorf("error accessing directory: %w", err)
|
|
}
|
|
|
|
if !info.IsDir() {
|
|
return nil, fmt.Errorf("path is not a directory: %s", cleanPath)
|
|
}
|
|
|
|
var files []FileInfo
|
|
|
|
files, err = f.listRecursive(cleanPath, showHidden, maxDepth, 0)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &FileListResult{
|
|
BasePath: cleanPath,
|
|
Files: files,
|
|
Count: len(files),
|
|
}, nil
|
|
}
|
|
|
|
func (f *FileList) listDirectory(path string, showHidden bool) ([]FileInfo, error) {
|
|
entries, err := os.ReadDir(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error reading directory: %w", err)
|
|
}
|
|
|
|
var files []FileInfo
|
|
for _, entry := range entries {
|
|
name := entry.Name()
|
|
|
|
// Skip hidden files if not requested
|
|
if !showHidden && strings.HasPrefix(name, ".") {
|
|
continue
|
|
}
|
|
|
|
fileInfo := FileInfo{
|
|
RelPath: name,
|
|
IsDir: entry.IsDir(),
|
|
}
|
|
|
|
files = append(files, fileInfo)
|
|
}
|
|
|
|
return files, nil
|
|
}
|
|
|
|
func (f *FileList) listRecursive(path string, showHidden bool, maxDepth, currentDepth int) ([]FileInfo, error) {
|
|
if currentDepth >= maxDepth {
|
|
return nil, nil
|
|
}
|
|
|
|
files, err := f.listDirectory(path, showHidden)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var allFiles []FileInfo
|
|
for _, file := range files {
|
|
// For the first level, use the file name as is
|
|
// For deeper levels, join with parent directory
|
|
if currentDepth != 0 {
|
|
// Get the relative part of the path by removing the base path
|
|
rel, err := filepath.Rel(filepath.Dir(path), path)
|
|
if err == nil {
|
|
file.RelPath = filepath.Join(rel, file.RelPath)
|
|
}
|
|
}
|
|
allFiles = append(allFiles, file)
|
|
|
|
if file.IsDir {
|
|
subFiles, err := f.listRecursive(filepath.Join(path, file.RelPath), showHidden, maxDepth, currentDepth+1)
|
|
if err != nil {
|
|
continue // Skip directories we can't read
|
|
}
|
|
allFiles = append(allFiles, subFiles...)
|
|
}
|
|
}
|
|
|
|
return allFiles, nil
|
|
}
|
|
|
|
// FileWriter implements the file writing functionality
|
|
// TODO(parthsareen): max file size limit
|
|
type FileWriter struct {
|
|
workingDir string
|
|
}
|
|
|
|
func (f *FileWriter) SetWorkingDir(dir string) {
|
|
f.workingDir = dir
|
|
}
|
|
|
|
func (f *FileWriter) Name() string {
|
|
return "file_write"
|
|
}
|
|
|
|
func (f *FileWriter) Description() string {
|
|
return "Write content to a file on the file system"
|
|
}
|
|
|
|
func (f *FileWriter) Prompt() string {
|
|
return `Use the file_write tool to write content to a file using the path parameter`
|
|
}
|
|
|
|
func (f *FileWriter) Schema() map[string]any {
|
|
schemaBytes := []byte(`{
|
|
"type": "object",
|
|
"properties": {
|
|
"path": {
|
|
"type": "string",
|
|
"description": "The path to the file to write"
|
|
},
|
|
"content": {
|
|
"type": "string",
|
|
"description": "The content to write to the file"
|
|
},
|
|
"append": {
|
|
"type": "boolean",
|
|
"description": "Whether to append to the file instead of overwriting (default: false)",
|
|
"default": false
|
|
},
|
|
"create_dirs": {
|
|
"type": "boolean",
|
|
"description": "Whether to create parent directories if they don't exist (default: false)",
|
|
"default": false
|
|
},
|
|
"max_size": {
|
|
"type": "integer",
|
|
"description": "Maximum content size to write in bytes (default: 1MB)",
|
|
"default": 1024 * 1024
|
|
}
|
|
},
|
|
"required": ["path", "content"]
|
|
}`)
|
|
var schema map[string]any
|
|
if err := json.Unmarshal(schemaBytes, &schema); err != nil {
|
|
return nil
|
|
}
|
|
return schema
|
|
}
|
|
|
|
func (f *FileWriter) Execute(ctx context.Context, args map[string]any) (any, error) {
|
|
path, ok := args["path"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("path parameter is required and must be a string")
|
|
}
|
|
|
|
// If path is not absolute and working directory is set, make it relative to working directory
|
|
if !filepath.IsAbs(path) && f.workingDir != "" {
|
|
path = filepath.Join(f.workingDir, path)
|
|
}
|
|
|
|
// Extract required parameters
|
|
content, ok := args["content"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("content parameter is required and must be a string")
|
|
}
|
|
|
|
// Get optional parameters with defaults
|
|
append := true // Always append by default
|
|
if a, ok := args["append"].(bool); ok && !a {
|
|
return nil, fmt.Errorf("overwriting existing files is not allowed - must use append mode")
|
|
}
|
|
|
|
createDirs := false
|
|
if cd, ok := args["create_dirs"].(bool); ok {
|
|
createDirs = cd
|
|
}
|
|
|
|
maxSize := int64(1024 * 1024) // 1MB default
|
|
if ms, ok := args["max_size"].(float64); ok {
|
|
maxSize = int64(ms)
|
|
}
|
|
|
|
// Security: Clean and validate the path
|
|
cleanPath := filepath.Clean(path)
|
|
if strings.Contains(cleanPath, "..") {
|
|
return nil, fmt.Errorf("path traversal not allowed")
|
|
}
|
|
|
|
// Check content size
|
|
if int64(len(content)) > maxSize {
|
|
return nil, fmt.Errorf("content too large (%d bytes), maximum allowed: %d bytes", len(content), maxSize)
|
|
}
|
|
|
|
// Create parent directories if requested
|
|
if createDirs {
|
|
dir := filepath.Dir(cleanPath)
|
|
if err := os.MkdirAll(dir, 0755); err != nil {
|
|
return nil, fmt.Errorf("failed to create parent directories: %w", err)
|
|
}
|
|
}
|
|
|
|
// Check if file exists - if it does, we must append
|
|
fileInfo, err := os.Stat(cleanPath)
|
|
if err == nil && fileInfo.Size() > 0 {
|
|
// File exists and has content
|
|
if !append {
|
|
return nil, fmt.Errorf("file %s already exists - cannot overwrite, must use append mode", cleanPath)
|
|
}
|
|
}
|
|
|
|
// Open file in append mode
|
|
flag := os.O_WRONLY | os.O_CREATE | os.O_APPEND
|
|
file, err := os.OpenFile(cleanPath, flag, 0644)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error opening file for writing: %w", err)
|
|
}
|
|
defer file.Close()
|
|
|
|
// Write content
|
|
n, err := file.WriteString(content)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error writing to file: %w", err)
|
|
}
|
|
|
|
// Get file info for response
|
|
info, err := file.Stat()
|
|
if err != nil {
|
|
// Return basic success info if we can't get file stats
|
|
return &FileWriteResult{
|
|
Path: cleanPath,
|
|
Written: n,
|
|
}, nil
|
|
}
|
|
|
|
return &FileWriteResult{
|
|
Path: cleanPath,
|
|
Size: info.Size(),
|
|
Written: n,
|
|
Mode: info.Mode().String(),
|
|
Modified: info.ModTime().Unix(),
|
|
}, nil
|
|
}
|