diff --git a/README.md b/README.md index 35ba763..ad67b2b 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,17 @@ This architecture provides several benefits: - No need to reconnect for each command - Better performance +## MCP Server + +Cremote includes a Model Context Protocol (MCP) server that provides a structured API for LLMs and AI agents. Instead of using CLI commands, the MCP server offers: + +- **State Management**: Automatic tracking of tabs, history, and iframe context +- **Intelligent Abstractions**: High-level tools that combine multiple operations +- **Better Error Handling**: Rich error context for debugging +- **Automatic Screenshots**: Built-in screenshot capture for documentation + +See the [MCP Server Documentation](mcp/README.md) for setup and usage instructions. + ## Prerequisites - Go 1.16 or higher diff --git a/client/client.go b/client/client.go index 0d940a6..5211f29 100644 --- a/client/client.go +++ b/client/client.go @@ -5,7 +5,9 @@ import ( "encoding/json" "fmt" "io" + "mime/multipart" "net/http" + "os" "strconv" ) @@ -602,3 +604,125 @@ func (c *Client) ClickElement(tabID, selector string, selectionTimeout, actionTi return nil } + +// UploadFileToContainer uploads a file from the client to the container +// localPath: path to the file on the client machine +// containerPath: optional path where to store the file in the container (defaults to /tmp/filename) +func (c *Client) UploadFileToContainer(localPath, containerPath string) (string, error) { + // Open the local file + file, err := os.Open(localPath) + if err != nil { + return "", fmt.Errorf("failed to open local file: %w", err) + } + defer file.Close() + + // Get file info + fileInfo, err := file.Stat() + if err != nil { + return "", fmt.Errorf("failed to get file info: %w", err) + } + + // Create multipart form + var body bytes.Buffer + writer := multipart.NewWriter(&body) + + // Add the file field + fileWriter, err := writer.CreateFormFile("file", fileInfo.Name()) + if err != nil { + return "", fmt.Errorf("failed to create form file: %w", err) + } + + _, err = io.Copy(fileWriter, file) + if err != nil { + return "", fmt.Errorf("failed to copy file data: %w", err) + } + + // Add the path field if specified + if containerPath != "" { + err = writer.WriteField("path", containerPath) + if err != nil { + return "", fmt.Errorf("failed to write path field: %w", err) + } + } + + err = writer.Close() + if err != nil { + return "", fmt.Errorf("failed to close multipart writer: %w", err) + } + + // Send the request + req, err := http.NewRequest("POST", c.serverURL+"/upload", &body) + if err != nil { + return "", fmt.Errorf("failed to create request: %w", err) + } + req.Header.Set("Content-Type", writer.FormDataContentType()) + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return "", fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, body) + } + + // Parse the response + var response Response + err = json.NewDecoder(resp.Body).Decode(&response) + if err != nil { + return "", fmt.Errorf("failed to decode response: %w", err) + } + + if !response.Success { + return "", fmt.Errorf("upload failed: %s", response.Error) + } + + // Extract the target path from response + data, ok := response.Data.(map[string]interface{}) + if !ok { + return "", fmt.Errorf("invalid response data format") + } + + targetPath, ok := data["target_path"].(string) + if !ok { + return "", fmt.Errorf("target_path not found in response") + } + + return targetPath, nil +} + +// DownloadFileFromContainer downloads a file from the container to the client +// containerPath: path to the file in the container +// localPath: path where to save the file on the client machine +func (c *Client) DownloadFileFromContainer(containerPath, localPath string) error { + // Create the request + url := fmt.Sprintf("%s/download?path=%s", c.serverURL, containerPath) + resp, err := http.Get(url) + if err != nil { + return fmt.Errorf("failed to send download request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("download failed with status %d: %s", resp.StatusCode, body) + } + + // Create the local file + localFile, err := os.Create(localPath) + if err != nil { + return fmt.Errorf("failed to create local file: %w", err) + } + defer localFile.Close() + + // Copy the response body to the local file + _, err = io.Copy(localFile, resp.Body) + if err != nil { + return fmt.Errorf("failed to save file: %w", err) + } + + return nil +} diff --git a/daemon/daemon.go b/daemon/daemon.go index 57ecade..a2abce2 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "io" "log" "net" "net/http" @@ -95,6 +96,8 @@ func NewDaemon(host string, port int) (*Daemon, error) { mux := http.NewServeMux() mux.HandleFunc("/command", daemon.handleCommand) mux.HandleFunc("/status", daemon.handleStatus) + mux.HandleFunc("/upload", daemon.handleFileUpload) + mux.HandleFunc("/download", daemon.handleFileDownload) daemon.server = &http.Server{ Addr: fmt.Sprintf("%s:%d", host, port), @@ -1637,3 +1640,105 @@ func (d *Daemon) switchToMain(tabID string) error { return nil } + +// handleFileUpload handles file upload requests from clients +func (d *Daemon) handleFileUpload(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + // Parse multipart form (32MB max memory) + err := r.ParseMultipartForm(32 << 20) + if err != nil { + http.Error(w, "Failed to parse multipart form", http.StatusBadRequest) + return + } + + // Get the uploaded file + file, header, err := r.FormFile("file") + if err != nil { + http.Error(w, "Failed to get uploaded file", http.StatusBadRequest) + return + } + defer file.Close() + + // Get the target path (optional, defaults to /tmp/) + targetPath := r.FormValue("path") + if targetPath == "" { + targetPath = "/tmp/" + header.Filename + } + + // Create the target file + targetFile, err := os.Create(targetPath) + if err != nil { + http.Error(w, fmt.Sprintf("Failed to create target file: %v", err), http.StatusInternalServerError) + return + } + defer targetFile.Close() + + // Copy the uploaded file to the target location + _, err = io.Copy(targetFile, file) + if err != nil { + http.Error(w, fmt.Sprintf("Failed to save file: %v", err), http.StatusInternalServerError) + return + } + + // Return success response + response := Response{ + Success: true, + Data: map[string]interface{}{ + "filename": header.Filename, + "size": header.Size, + "target_path": targetPath, + }, + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(response) +} + +// handleFileDownload handles file download requests from clients +func (d *Daemon) handleFileDownload(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + // Get the file path from query parameter + filePath := r.URL.Query().Get("path") + if filePath == "" { + http.Error(w, "File path is required", http.StatusBadRequest) + return + } + + // Check if file exists and get info + fileInfo, err := os.Stat(filePath) + if err != nil { + if os.IsNotExist(err) { + http.Error(w, "File not found", http.StatusNotFound) + } else { + http.Error(w, fmt.Sprintf("Failed to access file: %v", err), http.StatusInternalServerError) + } + return + } + + // Open the file + file, err := os.Open(filePath) + if err != nil { + http.Error(w, fmt.Sprintf("Failed to open file: %v", err), http.StatusInternalServerError) + return + } + defer file.Close() + + // Set headers for file download + w.Header().Set("Content-Type", "application/octet-stream") + w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", fileInfo.Name())) + w.Header().Set("Content-Length", strconv.FormatInt(fileInfo.Size(), 10)) + + // Stream the file to the client + _, err = io.Copy(w, file) + if err != nil { + log.Printf("Error streaming file to client: %v", err) + } +} diff --git a/mcp/LLM_MCP_GUIDE.md b/mcp/LLM_MCP_GUIDE.md new file mode 100644 index 0000000..cc8f088 --- /dev/null +++ b/mcp/LLM_MCP_GUIDE.md @@ -0,0 +1,642 @@ +# LLM Agent Guide: Using Cremote MCP Server for Web Automation + +This document provides comprehensive guidance for LLM agents on how to use the **Cremote MCP Server** for intelligent web automation. The MCP server provides a structured, stateful interface that's optimized for AI-driven web testing and automation workflows. + +## What is the Cremote MCP Server? + +The **Cremote MCP Server** is a Model Context Protocol implementation that wraps cremote's web automation capabilities in a structured API designed specifically for LLMs. Unlike CLI commands, the MCP server provides: + +- **Automatic State Management**: Tracks current tab, tab history, and iframe context +- **Intelligent Abstractions**: High-level tools that combine multiple operations +- **Rich Error Context**: Detailed error information for better debugging +- **Automatic Screenshots**: Built-in screenshot capture for documentation +- **Structured Responses**: Consistent, parseable JSON responses + +## Prerequisites + +Before using the MCP server, ensure the cremote infrastructure is running: + +1. **Check if everything is already running:** + ```bash + cremote status + ``` + +2. **Start Chromium with remote debugging (if needed):** + ```bash + chromium --remote-debugging-port=9222 --user-data-dir=/tmp/chromium-debug & + ``` + +3. **Start cremote daemon (if needed):** + ```bash + cremotedaemon & + ``` + +4. **The MCP server should be configured in your MCP client** (e.g., Claude Desktop) + +## Available MCP Tools + +### 1. `web_navigate` - Smart Navigation + +Navigate to URLs with automatic tab management and optional screenshot capture. + +**Parameters:** +- `url` (required): URL to navigate to +- `tab` (optional): Specific tab ID (uses current tab if not specified) +- `screenshot` (optional): Take screenshot after navigation (default: false) +- `timeout` (optional): Timeout in seconds (default: 5) + +**Example:** +```json +{ + "name": "web_navigate", + "arguments": { + "url": "https://example.com/login", + "screenshot": true, + "timeout": 10 + } +} +``` + +**Smart Behavior:** +- Automatically opens a new tab if none exists +- Updates current tab tracking +- Adds tab to history for easy switching + +### 2. `web_interact` - Element Interactions + +Interact with web elements through a unified interface. + +**Parameters:** +- `action` (required): "click", "fill", "submit", or "upload" +- `selector` (required): CSS selector for the target element +- `value` (optional): Value for fill/upload actions +- `tab` (optional): Tab ID (uses current tab if not specified) +- `timeout` (optional): Timeout in seconds (default: 5) + +**Examples:** +```json +// Fill a form field +{ + "name": "web_interact", + "arguments": { + "action": "fill", + "selector": "#username", + "value": "testuser" + } +} + +// Click a button +{ + "name": "web_interact", + "arguments": { + "action": "click", + "selector": "#login-button" + } +} + +// Submit a form +{ + "name": "web_interact", + "arguments": { + "action": "submit", + "selector": "form#login-form" + } +} + +// Upload a file +{ + "name": "web_interact", + "arguments": { + "action": "upload", + "selector": "input[type=file]", + "value": "/path/to/file.pdf" + } +} +``` + +### 3. `web_extract` - Data Extraction + +Extract information from web pages through multiple methods. + +**Parameters:** +- `type` (required): "source", "element", or "javascript" +- `selector` (optional): CSS selector (required for "element" type) +- `code` (optional): JavaScript code (required for "javascript" type) +- `tab` (optional): Tab ID (uses current tab if not specified) +- `timeout` (optional): Timeout in seconds (default: 5) + +**Examples:** +```json +// Get page source +{ + "name": "web_extract", + "arguments": { + "type": "source" + } +} + +// Get specific element HTML +{ + "name": "web_extract", + "arguments": { + "type": "element", + "selector": ".error-message" + } +} + +// Execute JavaScript and get result +{ + "name": "web_extract", + "arguments": { + "type": "javascript", + "code": "document.title" + } +} + +// Check form validation +{ + "name": "web_extract", + "arguments": { + "type": "javascript", + "code": "document.getElementById('email').validity.valid" + } +} +``` + +### 4. `web_screenshot` - Screenshot Capture + +Take screenshots for documentation and debugging. + +**Parameters:** +- `output` (required): File path for the screenshot +- `full_page` (optional): Capture full page vs viewport (default: false) +- `tab` (optional): Tab ID (uses current tab if not specified) +- `timeout` (optional): Timeout in seconds (default: 5) + +**Examples:** +```json +// Viewport screenshot +{ + "name": "web_screenshot", + "arguments": { + "output": "/tmp/login-page.png" + } +} + +// Full page screenshot +{ + "name": "web_screenshot", + "arguments": { + "output": "/tmp/full-page.png", + "full_page": true + } +} +``` + +### 5. `web_manage_tabs` - Tab Management + +Manage browser tabs with automatic state tracking. + +**Parameters:** +- `action` (required): "open", "close", "list", or "switch" +- `tab` (optional): Tab ID (required for "close" and "switch" actions) +- `timeout` (optional): Timeout in seconds (default: 5) + +**Examples:** +```json +// Open new tab +{ + "name": "web_manage_tabs", + "arguments": { + "action": "open" + } +} + +// List all tabs +{ + "name": "web_manage_tabs", + "arguments": { + "action": "list" + } +} + +// Switch to specific tab +{ + "name": "web_manage_tabs", + "arguments": { + "action": "switch", + "tab": "tab-id-123" + } +} + +// Close current tab +{ + "name": "web_manage_tabs", + "arguments": { + "action": "close" + } +} +``` + +### 6. `web_iframe` - Iframe Context Management + +Switch between main page and iframe contexts for testing embedded content. + +**Parameters:** +- `action` (required): "enter" or "exit" +- `selector` (optional): Iframe CSS selector (required for "enter" action) +- `tab` (optional): Tab ID (uses current tab if not specified) + +**Examples:** +```json +// Enter iframe context +{ + "name": "web_iframe", + "arguments": { + "action": "enter", + "selector": "iframe#payment-form" + } +} + +// Exit iframe context (return to main page) +{ + "name": "web_iframe", + "arguments": { + "action": "exit" + } +} +``` + +## Response Format + +All MCP tools return a consistent response structure: + +```json +{ + "success": true, + "data": "...", // Tool-specific response data + "screenshot": "/tmp/shot.png", // Screenshot path (if captured) + "current_tab": "tab-id-123", // Current active tab + "tab_history": ["tab-id-123"], // Tab history stack + "iframe_mode": false, // Whether in iframe context + "error": null, // Error message (if failed) + "metadata": {} // Additional context information +} +``` + +## Common Automation Patterns + +### 1. Login Flow Testing + +```json +// 1. Navigate to login page with screenshot +{ + "name": "web_navigate", + "arguments": { + "url": "https://myapp.com/login", + "screenshot": true + } +} + +// 2. Fill credentials +{ + "name": "web_interact", + "arguments": { + "action": "fill", + "selector": "#email", + "value": "user@example.com" + } +} + +{ + "name": "web_interact", + "arguments": { + "action": "fill", + "selector": "#password", + "value": "password123" + } +} + +// 3. Submit login +{ + "name": "web_interact", + "arguments": { + "action": "click", + "selector": "#login-button" + } +} + +// 4. Verify success +{ + "name": "web_extract", + "arguments": { + "type": "javascript", + "code": "document.querySelector('.welcome-message')?.textContent" + } +} + +// 5. Document result +{ + "name": "web_screenshot", + "arguments": { + "output": "/tmp/login-success.png" + } +} +``` + +### 2. Form Validation Testing + +```json +// 1. Navigate to form +{ + "name": "web_navigate", + "arguments": { + "url": "https://myapp.com/register" + } +} + +// 2. Test empty form submission +{ + "name": "web_interact", + "arguments": { + "action": "click", + "selector": "#submit-button" + } +} + +// 3. Check for validation errors +{ + "name": "web_extract", + "arguments": { + "type": "element", + "selector": ".error-message" + } +} + +// 4. Test invalid email +{ + "name": "web_interact", + "arguments": { + "action": "fill", + "selector": "#email", + "value": "invalid-email" + } +} + +// 5. Verify JavaScript validation +{ + "name": "web_extract", + "arguments": { + "type": "javascript", + "code": "document.getElementById('email').validity.valid" + } +} +``` + +### 3. Multi-Tab Workflow + +```json +// 1. Open multiple tabs for comparison +{ + "name": "web_manage_tabs", + "arguments": { + "action": "open" + } +} + +{ + "name": "web_navigate", + "arguments": { + "url": "https://app.com/admin" + } +} + +{ + "name": "web_manage_tabs", + "arguments": { + "action": "open" + } +} + +{ + "name": "web_navigate", + "arguments": { + "url": "https://app.com/user" + } +} + +// 2. List tabs to see current state +{ + "name": "web_manage_tabs", + "arguments": { + "action": "list" + } +} + +// 3. Switch between tabs as needed +{ + "name": "web_manage_tabs", + "arguments": { + "action": "switch", + "tab": "first-tab-id" + } +} +``` + +### 4. Iframe Testing (Payment Forms, Widgets) + +```json +// 1. Navigate to page with iframe +{ + "name": "web_navigate", + "arguments": { + "url": "https://shop.com/checkout" + } +} + +// 2. Enter iframe context +{ + "name": "web_iframe", + "arguments": { + "action": "enter", + "selector": "iframe.payment-frame" + } +} + +// 3. Interact with iframe content +{ + "name": "web_interact", + "arguments": { + "action": "fill", + "selector": "#card-number", + "value": "4111111111111111" + } +} + +{ + "name": "web_interact", + "arguments": { + "action": "fill", + "selector": "#expiry", + "value": "12/25" + } +} + +// 4. Exit iframe context +{ + "name": "web_iframe", + "arguments": { + "action": "exit" + } +} + +// 5. Continue with main page +{ + "name": "web_interact", + "arguments": { + "action": "click", + "selector": "#complete-order" + } +} +``` + +## Best Practices for LLMs + +### 1. State Awareness +- The MCP server automatically tracks state, but always check the response for current context +- Use the `current_tab` and `iframe_mode` fields to understand your current position +- The `tab_history` helps you understand available tabs + +### 2. Error Handling +- Always check the `success` field in responses +- Use the `error` field for detailed error information +- Take screenshots when errors occur for debugging: `"screenshot": true` + +### 3. Timeout Management +- Use longer timeouts for slow-loading pages or complex interactions +- Default 5-second timeouts work for most scenarios +- Increase timeouts for file uploads or heavy JavaScript applications + +### 4. Screenshot Strategy +- Take screenshots at key points for documentation +- Use `full_page: true` for comprehensive page captures +- Screenshot before and after critical actions for debugging + +### 5. Verification Patterns +- Always verify actions completed successfully +- Use JavaScript extraction to check application state +- Combine element extraction with JavaScript validation + +## Debugging Failed Tests + +### 1. Capture Current State +```json +// Get page source for analysis +{ + "name": "web_extract", + "arguments": { + "type": "source" + } +} + +// Take screenshot to see visual state +{ + "name": "web_screenshot", + "arguments": { + "output": "/tmp/debug-state.png", + "full_page": true + } +} + +// Check JavaScript console errors +{ + "name": "web_extract", + "arguments": { + "type": "javascript", + "code": "console.error.toString()" + } +} +``` + +### 2. Element Debugging +```json +// Check if element exists +{ + "name": "web_extract", + "arguments": { + "type": "javascript", + "code": "document.querySelector('#my-element') !== null" + } +} + +// Get element properties +{ + "name": "web_extract", + "arguments": { + "type": "javascript", + "code": "JSON.stringify({visible: document.querySelector('#my-element')?.offsetParent !== null, text: document.querySelector('#my-element')?.textContent})" + } +} +``` + +### 3. Network and Loading Issues +```json +// Check if page is still loading +{ + "name": "web_extract", + "arguments": { + "type": "javascript", + "code": "document.readyState" + } +} + +// Check for JavaScript errors +{ + "name": "web_extract", + "arguments": { + "type": "javascript", + "code": "window.onerror ? 'Errors detected' : 'No errors'" + } +} +``` + +## Advantages Over CLI Commands + +### 1. **Automatic State Management** +- No need to manually track tab IDs +- Automatic current tab resolution +- Persistent iframe context tracking + +### 2. **Rich Error Context** +- Detailed error messages with context +- Automatic screenshot capture on failures +- Structured error responses for better debugging + +### 3. **Intelligent Abstractions** +- Combined operations in single tools +- Smart parameter defaults and validation +- Automatic resource management + +### 4. **Better Performance** +- Direct library integration (no subprocess overhead) +- Persistent connections to cremote daemon +- Efficient state tracking + +### 5. **Structured Responses** +- Consistent JSON format for all responses +- Rich metadata for decision making +- Easy parsing and error handling + +## Key Differences from CLI Usage + +| Aspect | CLI Commands | MCP Server | +|--------|-------------|------------| +| **State Tracking** | Manual tab ID management | Automatic state management | +| **Error Handling** | Text parsing required | Structured error objects | +| **Screenshots** | Manual command execution | Automatic capture options | +| **Performance** | Subprocess overhead | Direct library calls | +| **Response Format** | Text output | Structured JSON | +| **Context Management** | Manual iframe tracking | Automatic context switching | +| **Resource Cleanup** | Manual tab management | Automatic resource tracking | + +The Cremote MCP Server transforms web automation from a series of CLI commands into an intelligent, stateful API that's optimized for AI-driven testing and automation workflows. diff --git a/mcp/LLM_USAGE_GUIDE.md b/mcp/LLM_USAGE_GUIDE.md new file mode 100644 index 0000000..8825908 --- /dev/null +++ b/mcp/LLM_USAGE_GUIDE.md @@ -0,0 +1,379 @@ +# Cremote MCP Tools - LLM Usage Guide + +This guide explains how LLMs can use the cremote MCP (Model Context Protocol) tools for web automation tasks. + +## Available Tools + +The cremote MCP server provides six comprehensive web automation tools: + +### 1. `web_navigate_cremotemcp` +Navigate to URLs and optionally take screenshots. + +**Parameters:** +- `url` (required): The URL to navigate to +- `screenshot` (optional): Boolean, take a screenshot after navigation +- `tab` (optional): Specific tab ID to use +- `timeout` (optional): Timeout in seconds (default: 5) + +**Example Usage:** +``` +web_navigate_cremotemcp: + url: "https://example.com" + screenshot: true +``` + +### 2. `web_interact_cremotemcp` +Interact with web elements through various actions. + +**Parameters:** +- `action` (required): One of "click", "fill", "submit", "upload" +- `selector` (required): CSS selector for the target element +- `value` (optional): Value for fill/upload actions +- `tab` (optional): Specific tab ID to use +- `timeout` (optional): Timeout in seconds (default: 5) + +**Example Usage:** +``` +web_interact_cremotemcp: + action: "click" + selector: "button.submit" + +web_interact_cremotemcp: + action: "fill" + selector: "input[name='email']" + value: "user@example.com" +``` + +### 3. `web_extract_cremotemcp` +Extract data from web pages (HTML source, element content, or JavaScript execution). + +**Parameters:** +- `type` (required): One of "source", "element", "javascript" +- `selector` (optional): CSS selector (required for "element" type) +- `code` (optional): JavaScript code (required for "javascript" type) +- `tab` (optional): Specific tab ID to use +- `timeout` (optional): Timeout in seconds (default: 5) + +**Example Usage:** +``` +web_extract_cremotemcp: + type: "source" + +web_extract_cremotemcp: + type: "element" + selector: "div.content" + +web_extract_cremotemcp: + type: "javascript" + code: "document.title" +``` + +### 4. `web_screenshot_cremotemcp` +Take screenshots of web pages. + +**Parameters:** +- `output` (required): File path where screenshot will be saved +- `full_page` (optional): Capture full page (default: false) +- `tab` (optional): Specific tab ID to use +- `timeout` (optional): Timeout in seconds (default: 5) + +**Example Usage:** +``` +web_screenshot_cremotemcp: + output: "/tmp/page-screenshot.png" + full_page: true +``` + +### 5. `web_manage_tabs_cremotemcp` +Manage browser tabs (open, close, list, switch). + +**Parameters:** +- `action` (required): One of "open", "close", "list", "switch" +- `tab` (optional): Tab ID (required for "close" and "switch" actions) +- `timeout` (optional): Timeout in seconds (default: 5) + +**Example Usage:** +``` +web_manage_tabs_cremotemcp: + action: "open" + +web_manage_tabs_cremotemcp: + action: "list" + +web_manage_tabs_cremotemcp: + action: "switch" + tab: "ABC123" +``` + +### 6. `web_iframe_cremotemcp` +Switch iframe context for subsequent operations. + +**Parameters:** +- `action` (required): One of "enter", "exit" +- `selector` (optional): Iframe CSS selector (required for "enter" action) +- `tab` (optional): Specific tab ID to use + +**Example Usage:** +``` +web_iframe_cremotemcp: + action: "enter" + selector: "iframe#payment-form" + +web_iframe_cremotemcp: + action: "exit" +``` + +## Common Usage Patterns + +### 1. Basic Web Navigation +``` +# Navigate to a website +web_navigate_cremotemcp: + url: "https://example.com" + screenshot: true +``` + +### 2. Form Interaction Sequence +``` +# 1. Navigate to the page +web_navigate_cremotemcp: + url: "https://example.com/login" + +# 2. Fill username field +web_interact_cremotemcp: + action: "fill" + selector: "input[name='username']" + value: "myusername" + +# 3. Fill password field +web_interact_cremotemcp: + action: "fill" + selector: "input[name='password']" + value: "mypassword" + +# 4. Submit the form +web_interact_cremotemcp: + action: "submit" + selector: "form" +``` + +### 3. Clicking Elements +``` +# Click a button +web_interact_cremotemcp: + action: "click" + selector: "button#submit-btn" + +# Click a link +web_interact_cremotemcp: + action: "click" + selector: "a[href='/dashboard']" +``` + +## Best Practices for LLMs + +### 1. Always Start with Navigation +Before interacting with elements, navigate to the target page: +``` +web_navigate_cremotemcp: + url: "https://target-website.com" +``` + +### 2. Use Specific CSS Selectors +Be as specific as possible with selectors to avoid ambiguity: +- Good: `input[name='email']`, `button.primary-submit` +- Avoid: `input`, `button` + +### 3. Take Screenshots for Debugging +When troubleshooting or documenting, use screenshots: +``` +web_navigate_cremotemcp: + url: "https://example.com" + screenshot: true +``` + +### 4. Handle Timeouts Appropriately +For slow-loading pages, increase timeout: +``` +web_navigate_cremotemcp: + url: "https://slow-website.com" + timeout: 10 +``` + +### 5. Sequential Operations +Perform operations in logical sequence: +1. Navigate to page +2. Fill required fields +3. Submit forms +4. Navigate to next page if needed + +## Error Handling + +### Common Error Scenarios: +1. **Element not found**: Selector doesn't match any elements +2. **Timeout**: Page takes too long to load or element to appear +3. **Navigation failed**: URL is invalid or unreachable + +### Troubleshooting Tips: +1. Verify the URL is correct and accessible +2. Check CSS selectors using browser developer tools +3. Increase timeout for slow-loading content +4. Take screenshots to see current page state + +## Tab Management + +The tools automatically manage browser tabs: +- If no tab is specified, a new tab is created automatically +- Tab IDs are returned in responses for reference +- Multiple tabs can be managed by specifying tab IDs + +## Security Considerations + +### Safe Practices: +- Only navigate to trusted websites +- Be cautious with form submissions +- Avoid entering sensitive information in examples +- Use screenshots sparingly to avoid exposing sensitive data + +### Limitations: +- Cannot bypass CAPTCHA or other anti-automation measures +- Subject to same-origin policy restrictions +- May not work with heavily JavaScript-dependent sites + +## Example: Complete Web Automation Task + +Here's a complete example of automating a web form: + +``` +# Step 1: Navigate to the form page +web_navigate_cremotemcp: + url: "https://example.com/contact" + screenshot: true + +# Step 2: Fill out the contact form +web_interact_cremotemcp: + action: "fill" + selector: "input[name='name']" + value: "John Doe" + +web_interact_cremotemcp: + action: "fill" + selector: "input[name='email']" + value: "john@example.com" + +web_interact_cremotemcp: + action: "fill" + selector: "textarea[name='message']" + value: "Hello, this is a test message." + +# Step 3: Submit the form +web_interact_cremotemcp: + action: "submit" + selector: "form#contact-form" + +# Step 4: Take a screenshot of the result +web_navigate_cremotemcp: + url: "current" # Stay on current page + screenshot: true +``` + +## Integration Notes + +- Tools use the `_cremotemcp` suffix to avoid naming conflicts +- Responses include success status and descriptive messages +- Screenshots are saved to `/tmp/` directory with timestamps +- The underlying cremote daemon handles browser management + +## Advanced Usage Examples + +### Testing Web Applications +``` +# Navigate to application +web_navigate_cremotemcp: + url: "https://myapp.com/login" + screenshot: true + +# Test login functionality +web_interact_cremotemcp: + action: "fill" + selector: "#username" + value: "testuser" + +web_interact_cremotemcp: + action: "fill" + selector: "#password" + value: "testpass" + +web_interact_cremotemcp: + action: "click" + selector: "button[type='submit']" + +# Verify successful login +web_navigate_cremotemcp: + url: "current" + screenshot: true +``` + +### Data Extraction Workflows +``` +# Navigate to data source +web_navigate_cremotemcp: + url: "https://data-site.com/table" + +# Click through pagination or filters +web_interact_cremotemcp: + action: "click" + selector: ".filter-button" + +# Take screenshot to document current state +web_navigate_cremotemcp: + url: "current" + screenshot: true +``` + +### File Upload Testing +``` +# Navigate to upload form +web_navigate_cremotemcp: + url: "https://example.com/upload" + +# Upload a file +web_interact_cremotemcp: + action: "upload" + selector: "input[type='file']" + value: "/path/to/test-file.pdf" + +# Submit the upload form +web_interact_cremotemcp: + action: "click" + selector: "button.upload-submit" +``` + +## Tool Response Format + +Both tools return structured responses: + +**Success Response:** +``` +"Successfully navigated to https://example.com in tab ABC123 (screenshot saved to /tmp/navigate-1234567890.png)" +``` + +**Error Response:** +``` +"failed to load URL: context deadline exceeded" +``` + +## CSS Selector Best Practices + +### Recommended Selector Types: +1. **ID selectors**: `#unique-id` (most reliable) +2. **Name attributes**: `input[name='fieldname']` +3. **Class combinations**: `.primary.button` +4. **Attribute selectors**: `button[data-action='submit']` + +### Avoid These Selectors: +1. **Generic tags**: `div`, `span`, `input` (too broad) +2. **Position-based**: `:nth-child()` (fragile) +3. **Text-based**: `:contains()` (not standard CSS) + +This documentation should help LLMs effectively use the cremote MCP tools for web automation tasks. diff --git a/mcp/QUICK_REFERENCE.md b/mcp/QUICK_REFERENCE.md new file mode 100644 index 0000000..ce17d5d --- /dev/null +++ b/mcp/QUICK_REFERENCE.md @@ -0,0 +1,96 @@ +# Cremote MCP Tools - Quick Reference + +## Tool Names +- `web_navigate_cremotemcp` - Navigate to URLs +- `web_interact_cremotemcp` - Interact with elements +- `web_extract_cremotemcp` - Extract page data +- `web_screenshot_cremotemcp` - Take screenshots +- `web_manage_tabs_cremotemcp` - Manage browser tabs +- `web_iframe_cremotemcp` - Switch iframe context + +## Essential Parameters + +### web_navigate_cremotemcp +```yaml +url: "https://example.com" # Required +screenshot: true # Optional, default false +timeout: 10 # Optional, default 5 seconds +``` + +### web_interact_cremotemcp +```yaml +action: "click" # Required: click|fill|submit|upload +selector: "button.submit" # Required: CSS selector +value: "text to fill" # Required for fill/upload actions +timeout: 10 # Optional, default 5 seconds +``` + +## Common Patterns + +### Navigate + Screenshot +```yaml +web_navigate_cremotemcp: + url: "https://example.com" + screenshot: true +``` + +### Fill Form Field +```yaml +web_interact_cremotemcp: + action: "fill" + selector: "input[name='email']" + value: "user@example.com" +``` + +### Click Button +```yaml +web_interact_cremotemcp: + action: "click" + selector: "button#submit" +``` + +### Submit Form +```yaml +web_interact_cremotemcp: + action: "submit" + selector: "form" +``` + +### Upload File +```yaml +web_interact_cremotemcp: + action: "upload" + selector: "input[type='file']" + value: "/path/to/file.pdf" +``` + +## Best CSS Selectors + +✅ **Good:** +- `#unique-id` +- `input[name='fieldname']` +- `button.primary-submit` +- `form#login-form` + +❌ **Avoid:** +- `div` (too generic) +- `input` (too broad) +- `:nth-child(3)` (fragile) + +## Typical Workflow + +1. **Navigate** to target page +2. **Fill** required form fields +3. **Click** submit buttons +4. **Take screenshots** for verification +5. **Navigate** to next page if needed + +## Error Handling + +- **Element not found**: Check CSS selector +- **Timeout**: Increase timeout parameter +- **Navigation failed**: Verify URL accessibility + +## Screenshots + +Screenshots are automatically saved to `/tmp/navigate-{timestamp}.png` when requested. diff --git a/mcp/README.md b/mcp/README.md new file mode 100644 index 0000000..f33d1b1 --- /dev/null +++ b/mcp/README.md @@ -0,0 +1,252 @@ +# Cremote MCP Server + +This is a Model Context Protocol (MCP) server that exposes cremote's web automation capabilities to LLMs and AI agents. Instead of using CLI commands, this server provides a structured API that maintains state and provides intelligent abstractions. + +## Features + +- **State Management**: Automatically tracks current tab, tab history, and iframe context +- **Intelligent Abstractions**: High-level tools that combine multiple cremote operations +- **Automatic Screenshots**: Optional screenshot capture for debugging and documentation +- **Error Recovery**: Better error handling and context for LLMs +- **Resource Management**: Automatic cleanup and connection management + +## Quick Start for LLMs + +**For LLM agents**: See the comprehensive [LLM MCP Guide](LLM_MCP_GUIDE.md) for detailed usage instructions, examples, and best practices. + +## Available Tools + +### 1. `web_navigate` +Navigate to URLs with optional screenshot capture. + +```json +{ + "name": "web_navigate", + "arguments": { + "url": "https://example.com", + "screenshot": true, + "timeout": 10 + } +} +``` + +### 2. `web_interact` +Interact with web elements (click, fill, submit, upload). + +```json +{ + "name": "web_interact", + "arguments": { + "action": "fill", + "selector": "#username", + "value": "testuser", + "timeout": 5 + } +} +``` + +### 3. `web_extract` +Extract data from pages (source, element HTML, JavaScript execution). + +```json +{ + "name": "web_extract", + "arguments": { + "type": "javascript", + "code": "document.title", + "timeout": 5 + } +} +``` + +### 4. `web_screenshot` +Take screenshots of the current page. + +```json +{ + "name": "web_screenshot", + "arguments": { + "output": "/tmp/page.png", + "full_page": true, + "timeout": 5 + } +} +``` + +### 5. `web_manage_tabs` +Manage browser tabs (open, close, list, switch). + +```json +{ + "name": "web_manage_tabs", + "arguments": { + "action": "open", + "timeout": 5 + } +} +``` + +### 6. `web_iframe` +Switch iframe context for subsequent operations. + +```json +{ + "name": "web_iframe", + "arguments": { + "action": "enter", + "selector": "iframe#payment-form" + } +} +``` + +## Installation & Usage + +### Prerequisites + +1. **Cremote daemon must be running**: + ```bash + cremotedaemon + ``` + +2. **Chrome/Chromium with remote debugging**: + ```bash + chromium --remote-debugging-port=9222 --user-data-dir=/tmp/chromium-debug + ``` + +### Build the MCP Server + +```bash +cd mcp/ +go build -o cremote-mcp . +``` + +### Configuration + +Set environment variables to configure the cremote connection: + +```bash +export CREMOTE_HOST=localhost +export CREMOTE_PORT=8989 +``` + +### Running with Claude Desktop + +Add to your Claude Desktop configuration (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS): + +```json +{ + "mcpServers": { + "cremote": { + "command": "/path/to/cremote-mcp", + "env": { + "CREMOTE_HOST": "localhost", + "CREMOTE_PORT": "8989" + } + } + } +} +``` + +### Running with Other MCP Clients + +The server communicates via JSON-RPC over stdio, so it can be used with any MCP-compatible client: + +```bash +echo '{"method":"tools/list","params":{},"id":1}' | ./cremote-mcp +``` + +## Response Format + +All tool responses include: + +```json +{ + "success": true, + "data": "...", + "screenshot": "/tmp/screenshot.png", + "current_tab": "tab-id-123", + "tab_history": ["tab-id-123", "tab-id-456"], + "iframe_mode": false, + "error": null, + "metadata": {} +} +``` + +## Example Workflow + +```json +// 1. Navigate to a page +{ + "name": "web_navigate", + "arguments": { + "url": "https://example.com/login", + "screenshot": true + } +} + +// 2. Fill login form +{ + "name": "web_interact", + "arguments": { + "action": "fill", + "selector": "#username", + "value": "testuser" + } +} + +{ + "name": "web_interact", + "arguments": { + "action": "fill", + "selector": "#password", + "value": "password123" + } +} + +// 3. Submit form +{ + "name": "web_interact", + "arguments": { + "action": "click", + "selector": "#login-button" + } +} + +// 4. Extract result +{ + "name": "web_extract", + "arguments": { + "type": "javascript", + "code": "document.querySelector('.welcome-message')?.textContent" + } +} + +// 5. Take final screenshot +{ + "name": "web_screenshot", + "arguments": { + "output": "/tmp/login-success.png", + "full_page": true + } +} +``` + +## Benefits Over CLI + +- **State Management**: No need to manually track tab IDs +- **Better Error Context**: Rich error information for debugging +- **Automatic Screenshots**: Built-in screenshot capture for documentation +- **Intelligent Defaults**: Smart parameter handling and fallbacks +- **Resource Cleanup**: Automatic management of tabs and files +- **Structured Responses**: Consistent, parseable response format + +## Development + +To extend the MCP server with new tools: + +1. Add the tool definition to `handleToolsList()` +2. Add a case in `handleToolCall()` +3. Implement the handler function following the pattern of existing handlers +4. Update this documentation + +The server is designed to be easily extensible while maintaining consistency with the cremote client library. diff --git a/mcp/backup/cremote-mcp b/mcp/backup/cremote-mcp new file mode 100755 index 0000000..6bcdbc4 Binary files /dev/null and b/mcp/backup/cremote-mcp differ diff --git a/mcp/backup/cremote-mcp-http b/mcp/backup/cremote-mcp-http new file mode 100755 index 0000000..b9177f6 Binary files /dev/null and b/mcp/backup/cremote-mcp-http differ diff --git a/mcp/backup/cremote-mcp-stdio b/mcp/backup/cremote-mcp-stdio new file mode 100755 index 0000000..6b53604 Binary files /dev/null and b/mcp/backup/cremote-mcp-stdio differ diff --git a/mcp/backup/server-stdio.go b/mcp/backup/server-stdio.go new file mode 100644 index 0000000..99ee7ed --- /dev/null +++ b/mcp/backup/server-stdio.go @@ -0,0 +1,923 @@ +package main + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "log" + "os" + "os/signal" + "path/filepath" + "strconv" + "strings" + "syscall" + "time" + + "git.teamworkapps.com/shortcut/cremote/client" +) + +// DebugLogger handles debug logging to file +type DebugLogger struct { + file *os.File + logger *log.Logger +} + +// NewDebugLogger creates a new debug logger +func NewDebugLogger() (*DebugLogger, error) { + logDir := "/tmp/cremote-mcp-logs" + if err := os.MkdirAll(logDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create log directory: %w", err) + } + + logFile := filepath.Join(logDir, fmt.Sprintf("mcp-stdio-%d.log", time.Now().Unix())) + file, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + return nil, fmt.Errorf("failed to open log file: %w", err) + } + + logger := log.New(file, "", log.LstdFlags|log.Lmicroseconds|log.Lshortfile) + logger.Printf("=== MCP STDIO Server Debug Log Started ===") + + return &DebugLogger{ + file: file, + logger: logger, + }, nil +} + +// Log writes a debug message +func (d *DebugLogger) Log(format string, args ...interface{}) { + if d != nil && d.logger != nil { + d.logger.Printf(format, args...) + } +} + +// LogJSON logs a JSON object with a label +func (d *DebugLogger) LogJSON(label string, obj interface{}) { + if d != nil && d.logger != nil { + jsonBytes, err := json.MarshalIndent(obj, "", " ") + if err != nil { + d.logger.Printf("%s: JSON marshal error: %v", label, err) + } else { + d.logger.Printf("%s:\n%s", label, string(jsonBytes)) + } + } +} + +// Close closes the debug logger +func (d *DebugLogger) Close() { + if d != nil && d.file != nil { + d.logger.Printf("=== MCP STDIO Server Debug Log Ended ===") + d.file.Close() + } +} + +var debugLogger *DebugLogger + +// MCPServer wraps the cremote client with MCP protocol +type MCPServer struct { + client *client.Client + currentTab string + tabHistory []string + iframeMode bool + screenshots []string +} + +// MCPRequest represents an incoming MCP request +type MCPRequest struct { + JSONRPC string `json:"jsonrpc,omitempty"` + Method string `json:"method"` + Params map[string]interface{} `json:"params"` + ID interface{} `json:"id"` +} + +// MCPResponse represents an MCP response +type MCPResponse struct { + JSONRPC string `json:"jsonrpc,omitempty"` + Result interface{} `json:"result,omitempty"` + Error *MCPError `json:"error,omitempty"` + ID interface{} `json:"id"` +} + +// MCPError represents an MCP error +type MCPError struct { + Code int `json:"code"` + Message string `json:"message"` +} + +// ToolResult represents the result of a tool execution +type ToolResult struct { + Success bool `json:"success"` + Data interface{} `json:"data,omitempty"` + Screenshot string `json:"screenshot,omitempty"` + CurrentTab string `json:"current_tab,omitempty"` + TabHistory []string `json:"tab_history,omitempty"` + IframeMode bool `json:"iframe_mode"` + Error string `json:"error,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` +} + +// NewMCPServer creates a new MCP server instance +func NewMCPServer(host string, port int) *MCPServer { + c := client.NewClient(host, port) + return &MCPServer{ + client: c, + tabHistory: make([]string, 0), + screenshots: make([]string, 0), + } +} + +// HandleRequest processes an MCP request and returns a response +func (s *MCPServer) HandleRequest(req MCPRequest) MCPResponse { + debugLogger.Log("HandleRequest called with method: %s, ID: %v", req.Method, req.ID) + debugLogger.LogJSON("Incoming Request", req) + + var resp MCPResponse + + switch req.Method { + case "initialize": + debugLogger.Log("Handling initialize request") + resp = s.handleInitialize(req) + case "tools/list": + debugLogger.Log("Handling tools/list request") + resp = s.handleToolsList(req) + case "tools/call": + debugLogger.Log("Handling tools/call request") + resp = s.handleToolCall(req) + default: + debugLogger.Log("Unknown method: %s", req.Method) + resp = MCPResponse{ + Error: &MCPError{ + Code: -32601, + Message: fmt.Sprintf("Method not found: %s", req.Method), + }, + ID: req.ID, + } + } + + debugLogger.LogJSON("Response", resp) + debugLogger.Log("HandleRequest completed for method: %s", req.Method) + return resp +} + +// handleInitialize handles the MCP initialize request +func (s *MCPServer) handleInitialize(req MCPRequest) MCPResponse { + debugLogger.Log("handleInitialize: Processing initialize request") + debugLogger.LogJSON("Initialize request params", req.Params) + + result := map[string]interface{}{ + "protocolVersion": "2024-11-05", + "capabilities": map[string]interface{}{ + "tools": map[string]interface{}{ + "listChanged": true, + }, + }, + "serverInfo": map[string]interface{}{ + "name": "cremote-mcp", + "version": "1.0.0", + }, + } + + debugLogger.LogJSON("Initialize response result", result) + + return MCPResponse{ + Result: result, + ID: req.ID, + } +} + +// handleToolsList returns the list of available tools +func (s *MCPServer) handleToolsList(req MCPRequest) MCPResponse { + tools := []map[string]interface{}{ + { + "name": "web_navigate", + "description": "Navigate to a URL and optionally take a screenshot", + "inputSchema": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "url": map[string]interface{}{ + "type": "string", + "description": "URL to navigate to", + }, + "tab": map[string]interface{}{ + "type": "string", + "description": "Tab ID (optional, uses current tab)", + }, + "screenshot": map[string]interface{}{ + "type": "boolean", + "description": "Take screenshot after navigation", + }, + "timeout": map[string]interface{}{ + "type": "integer", + "description": "Timeout in seconds", + "default": 5, + }, + }, + "required": []string{"url"}, + }, + }, + { + "name": "web_interact", + "description": "Interact with web elements (click, fill, submit)", + "inputSchema": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "action": map[string]interface{}{ + "type": "string", + "enum": []string{"click", "fill", "submit", "upload"}, + }, + "selector": map[string]interface{}{ + "type": "string", + "description": "CSS selector for the element", + }, + "value": map[string]interface{}{ + "type": "string", + "description": "Value to fill (for fill/upload actions)", + }, + "tab": map[string]interface{}{ + "type": "string", + "description": "Tab ID (optional)", + }, + "timeout": map[string]interface{}{ + "type": "integer", + "description": "Timeout in seconds", + "default": 5, + }, + }, + "required": []string{"action", "selector"}, + }, + }, + { + "name": "web_extract", + "description": "Extract data from the page (source, element HTML, or execute JavaScript)", + "inputSchema": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "type": map[string]interface{}{ + "type": "string", + "enum": []string{"source", "element", "javascript"}, + }, + "selector": map[string]interface{}{ + "type": "string", + "description": "CSS selector (for element type)", + }, + "code": map[string]interface{}{ + "type": "string", + "description": "JavaScript code (for javascript type)", + }, + "tab": map[string]interface{}{ + "type": "string", + "description": "Tab ID (optional)", + }, + "timeout": map[string]interface{}{ + "type": "integer", + "description": "Timeout in seconds", + "default": 5, + }, + }, + "required": []string{"type"}, + }, + }, + { + "name": "web_screenshot", + "description": "Take a screenshot of the current page", + "inputSchema": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "output": map[string]interface{}{ + "type": "string", + "description": "Output file path", + }, + "full_page": map[string]interface{}{ + "type": "boolean", + "description": "Capture full page", + "default": false, + }, + "tab": map[string]interface{}{ + "type": "string", + "description": "Tab ID (optional)", + }, + "timeout": map[string]interface{}{ + "type": "integer", + "description": "Timeout in seconds", + "default": 5, + }, + }, + "required": []string{"output"}, + }, + }, + { + "name": "web_manage_tabs", + "description": "Manage browser tabs (open, close, list, switch)", + "inputSchema": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "action": map[string]interface{}{ + "type": "string", + "enum": []string{"open", "close", "list", "switch"}, + }, + "tab": map[string]interface{}{ + "type": "string", + "description": "Tab ID (for close/switch actions)", + }, + "timeout": map[string]interface{}{ + "type": "integer", + "description": "Timeout in seconds", + "default": 5, + }, + }, + "required": []string{"action"}, + }, + }, + { + "name": "web_iframe", + "description": "Switch iframe context for subsequent operations", + "inputSchema": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "action": map[string]interface{}{ + "type": "string", + "enum": []string{"enter", "exit"}, + }, + "selector": map[string]interface{}{ + "type": "string", + "description": "Iframe CSS selector (for enter action)", + }, + "tab": map[string]interface{}{ + "type": "string", + "description": "Tab ID (optional)", + }, + }, + "required": []string{"action"}, + }, + }, + } + + return MCPResponse{ + Result: map[string]interface{}{ + "tools": tools, + }, + ID: req.ID, + } +} + +// handleToolCall executes a tool and returns the result +func (s *MCPServer) handleToolCall(req MCPRequest) MCPResponse { + debugLogger.Log("handleToolCall: Processing tool call request") + debugLogger.LogJSON("Tool call request params", req.Params) + + params := req.Params + + toolName, ok := params["name"].(string) + if !ok || toolName == "" { + debugLogger.Log("handleToolCall: Tool name missing or invalid") + return MCPResponse{ + Error: &MCPError{Code: -32602, Message: "Missing tool name"}, + ID: req.ID, + } + } + + debugLogger.Log("handleToolCall: Tool name extracted: %s", toolName) + + arguments, _ := params["arguments"].(map[string]interface{}) + if arguments == nil { + debugLogger.Log("handleToolCall: No arguments provided, using empty map") + arguments = make(map[string]interface{}) + } else { + debugLogger.LogJSON("Tool arguments", arguments) + } + + var result ToolResult + var err error + + debugLogger.Log("handleToolCall: Dispatching to tool handler: %s", toolName) + + switch toolName { + case "web_navigate": + result, err = s.handleNavigate(arguments) + case "web_interact": + result, err = s.handleInteract(arguments) + case "web_extract": + result, err = s.handleExtract(arguments) + case "web_screenshot": + result, err = s.handleScreenshot(arguments) + case "web_manage_tabs": + result, err = s.handleManageTabs(arguments) + case "web_iframe": + result, err = s.handleIframe(arguments) + default: + debugLogger.Log("handleToolCall: Unknown tool: %s", toolName) + return MCPResponse{ + Error: &MCPError{Code: -32601, Message: fmt.Sprintf("Unknown tool: %s", toolName)}, + ID: req.ID, + } + } + + debugLogger.LogJSON("Tool execution result", result) + + if err != nil { + debugLogger.Log("handleToolCall: Tool execution error: %v", err) + return MCPResponse{ + Error: &MCPError{Code: -32603, Message: err.Error()}, + ID: req.ID, + } + } + + // Always include current state in response + result.CurrentTab = s.currentTab + result.TabHistory = s.tabHistory + result.IframeMode = s.iframeMode + + response := MCPResponse{Result: result, ID: req.ID} + debugLogger.LogJSON("Final tool call response", response) + debugLogger.Log("handleToolCall: Completed successfully for tool: %s", toolName) + + return response +} + +// Helper functions for parameter extraction +func getStringParam(params map[string]interface{}, key, defaultValue string) string { + if val, ok := params[key].(string); ok { + return val + } + return defaultValue +} + +func getIntParam(params map[string]interface{}, key string, defaultValue int) int { + if val, ok := params[key].(float64); ok { + return int(val) + } + return defaultValue +} + +func getBoolParam(params map[string]interface{}, key string, defaultValue bool) bool { + if val, ok := params[key].(bool); ok { + return val + } + return defaultValue +} + +// resolveTabID returns the tab ID to use, defaulting to current tab +func (s *MCPServer) resolveTabID(tabID string) string { + if tabID != "" { + return tabID + } + return s.currentTab +} + +// handleNavigate handles web navigation +func (s *MCPServer) handleNavigate(params map[string]interface{}) (ToolResult, error) { + url := getStringParam(params, "url", "") + if url == "" { + return ToolResult{}, fmt.Errorf("url parameter is required") + } + + tab := getStringParam(params, "tab", "") + screenshot := getBoolParam(params, "screenshot", false) + timeout := getIntParam(params, "timeout", 5) + + // If no tab specified and no current tab, open a new one + if tab == "" && s.currentTab == "" { + newTab, err := s.client.OpenTab(timeout) + if err != nil { + return ToolResult{}, fmt.Errorf("failed to open new tab: %w", err) + } + s.currentTab = newTab + s.tabHistory = append(s.tabHistory, newTab) + tab = newTab + } else if tab == "" { + tab = s.currentTab + } else { + // Update current tab if specified + s.currentTab = tab + // Move to end of history if it exists, otherwise add it + s.removeTabFromHistory(tab) + s.tabHistory = append(s.tabHistory, tab) + } + + // Navigate to URL + err := s.client.LoadURL(tab, url, timeout) + if err != nil { + return ToolResult{}, fmt.Errorf("failed to navigate to %s: %w", url, err) + } + + result := ToolResult{ + Success: true, + Data: map[string]string{ + "url": url, + "tab": tab, + }, + } + + // Take screenshot if requested + if screenshot { + screenshotPath := fmt.Sprintf("/tmp/navigate-%d.png", time.Now().Unix()) + err = s.client.TakeScreenshot(tab, screenshotPath, false, timeout) + if err != nil { + // Don't fail the whole operation for screenshot errors + result.Metadata = map[string]string{ + "screenshot_error": err.Error(), + } + } else { + result.Screenshot = screenshotPath + s.screenshots = append(s.screenshots, screenshotPath) + } + } + + return result, nil +} + +// handleInteract handles element interactions +func (s *MCPServer) handleInteract(params map[string]interface{}) (ToolResult, error) { + action := getStringParam(params, "action", "") + selector := getStringParam(params, "selector", "") + value := getStringParam(params, "value", "") + tab := s.resolveTabID(getStringParam(params, "tab", "")) + timeout := getIntParam(params, "timeout", 5) + + if action == "" { + return ToolResult{}, fmt.Errorf("action parameter is required") + } + if selector == "" { + return ToolResult{}, fmt.Errorf("selector parameter is required") + } + if tab == "" { + return ToolResult{}, fmt.Errorf("no active tab available") + } + + var err error + result := ToolResult{Success: true} + + switch action { + case "click": + err = s.client.ClickElement(tab, selector, timeout, timeout) + result.Data = map[string]string{"action": "clicked", "selector": selector} + + case "fill": + if value == "" { + return ToolResult{}, fmt.Errorf("value parameter is required for fill action") + } + err = s.client.FillFormField(tab, selector, value, timeout, timeout) + result.Data = map[string]string{"action": "filled", "selector": selector, "value": value} + + case "submit": + err = s.client.SubmitForm(tab, selector, timeout, timeout) + result.Data = map[string]string{"action": "submitted", "selector": selector} + + case "upload": + if value == "" { + return ToolResult{}, fmt.Errorf("value parameter is required for upload action") + } + err = s.client.UploadFile(tab, selector, value, timeout, timeout) + result.Data = map[string]string{"action": "uploaded", "selector": selector, "file": value} + + default: + return ToolResult{}, fmt.Errorf("unknown action: %s", action) + } + + if err != nil { + return ToolResult{}, fmt.Errorf("failed to %s element: %w", action, err) + } + + return result, nil +} + +// handleExtract handles data extraction +func (s *MCPServer) handleExtract(params map[string]interface{}) (ToolResult, error) { + extractType := getStringParam(params, "type", "") + selector := getStringParam(params, "selector", "") + code := getStringParam(params, "code", "") + tab := s.resolveTabID(getStringParam(params, "tab", "")) + timeout := getIntParam(params, "timeout", 5) + + if extractType == "" { + return ToolResult{}, fmt.Errorf("type parameter is required") + } + if tab == "" { + return ToolResult{}, fmt.Errorf("no active tab available") + } + + var data string + var err error + + switch extractType { + case "source": + data, err = s.client.GetPageSource(tab, timeout) + + case "element": + if selector == "" { + return ToolResult{}, fmt.Errorf("selector parameter is required for element type") + } + data, err = s.client.GetElementHTML(tab, selector, timeout) + + case "javascript": + if code == "" { + return ToolResult{}, fmt.Errorf("code parameter is required for javascript type") + } + data, err = s.client.EvalJS(tab, code, timeout) + + default: + return ToolResult{}, fmt.Errorf("unknown extract type: %s", extractType) + } + + if err != nil { + return ToolResult{}, fmt.Errorf("failed to extract %s: %w", extractType, err) + } + + return ToolResult{ + Success: true, + Data: data, + }, nil +} + +// handleScreenshot handles screenshot capture +func (s *MCPServer) handleScreenshot(params map[string]interface{}) (ToolResult, error) { + output := getStringParam(params, "output", "") + if output == "" { + output = fmt.Sprintf("/tmp/screenshot-%d.png", time.Now().Unix()) + } + + tab := s.resolveTabID(getStringParam(params, "tab", "")) + fullPage := getBoolParam(params, "full_page", false) + timeout := getIntParam(params, "timeout", 5) + + if tab == "" { + return ToolResult{}, fmt.Errorf("no active tab available") + } + + err := s.client.TakeScreenshot(tab, output, fullPage, timeout) + if err != nil { + return ToolResult{}, fmt.Errorf("failed to take screenshot: %w", err) + } + + s.screenshots = append(s.screenshots, output) + + return ToolResult{ + Success: true, + Screenshot: output, + Data: map[string]interface{}{ + "output": output, + "full_page": fullPage, + "tab": tab, + }, + }, nil +} + +// handleManageTabs handles tab management operations +func (s *MCPServer) handleManageTabs(params map[string]interface{}) (ToolResult, error) { + action := getStringParam(params, "action", "") + tab := getStringParam(params, "tab", "") + timeout := getIntParam(params, "timeout", 5) + + if action == "" { + return ToolResult{}, fmt.Errorf("action parameter is required") + } + + var data interface{} + var err error + + switch action { + case "open": + newTab, err := s.client.OpenTab(timeout) + if err != nil { + return ToolResult{}, fmt.Errorf("failed to open tab: %w", err) + } + s.currentTab = newTab + s.tabHistory = append(s.tabHistory, newTab) + data = map[string]string{"tab": newTab, "action": "opened"} + + case "close": + targetTab := s.resolveTabID(tab) + if targetTab == "" { + return ToolResult{}, fmt.Errorf("no tab to close") + } + err = s.client.CloseTab(targetTab, timeout) + if err != nil { + return ToolResult{}, fmt.Errorf("failed to close tab: %w", err) + } + s.removeTabFromHistory(targetTab) + data = map[string]string{"tab": targetTab, "action": "closed"} + + case "list": + tabs, err := s.client.ListTabs() + if err != nil { + return ToolResult{}, fmt.Errorf("failed to list tabs: %w", err) + } + data = tabs + + case "switch": + if tab == "" { + return ToolResult{}, fmt.Errorf("tab parameter is required for switch action") + } + s.currentTab = tab + s.removeTabFromHistory(tab) + s.tabHistory = append(s.tabHistory, tab) + data = map[string]string{"tab": tab, "action": "switched"} + + default: + return ToolResult{}, fmt.Errorf("unknown tab action: %s", action) + } + + if err != nil { + return ToolResult{}, err + } + + return ToolResult{ + Success: true, + Data: data, + }, nil +} + +// handleIframe handles iframe context switching +func (s *MCPServer) handleIframe(params map[string]interface{}) (ToolResult, error) { + action := getStringParam(params, "action", "") + selector := getStringParam(params, "selector", "") + tab := s.resolveTabID(getStringParam(params, "tab", "")) + + if action == "" { + return ToolResult{}, fmt.Errorf("action parameter is required") + } + + if tab == "" { + return ToolResult{}, fmt.Errorf("no active tab available") + } + + var err error + var data map[string]string + + switch action { + case "enter": + if selector == "" { + return ToolResult{}, fmt.Errorf("selector parameter is required for enter action") + } + err = s.client.SwitchToIframe(tab, selector) + s.iframeMode = true + data = map[string]string{"action": "entered", "selector": selector} + + case "exit": + err = s.client.SwitchToMain(tab) + s.iframeMode = false + data = map[string]string{"action": "exited"} + + default: + return ToolResult{}, fmt.Errorf("unknown iframe action: %s", action) + } + + if err != nil { + return ToolResult{}, fmt.Errorf("failed to %s iframe: %w", action, err) + } + + return ToolResult{ + Success: true, + Data: data, + }, nil +} + +// removeTabFromHistory removes a tab from history and updates current tab +func (s *MCPServer) removeTabFromHistory(tabID string) { + for i, id := range s.tabHistory { + if id == tabID { + s.tabHistory = append(s.tabHistory[:i], s.tabHistory[i+1:]...) + break + } + } + if s.currentTab == tabID { + if len(s.tabHistory) > 0 { + s.currentTab = s.tabHistory[len(s.tabHistory)-1] + } else { + s.currentTab = "" + } + } +} + +func main() { + // Initialize debug logger + var err error + debugLogger, err = NewDebugLogger() + if err != nil { + log.Printf("Warning: Failed to initialize debug logger: %v", err) + // Continue without debug logging + } else { + defer debugLogger.Close() + debugLogger.Log("MCP STDIO Server starting up") + } + + // Set up signal handling for graceful shutdown + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) + + host := os.Getenv("CREMOTE_HOST") + if host == "" { + host = "localhost" + } + + portStr := os.Getenv("CREMOTE_PORT") + port := 8989 + if portStr != "" { + if p, err := strconv.Atoi(portStr); err == nil { + port = p + } + } + + debugLogger.Log("Connecting to cremote daemon at %s:%d", host, port) + log.Printf("Starting MCP stdio server, connecting to cremote daemon at %s:%d", host, port) + server := NewMCPServer(host, port) + + // Create a buffered reader for better EOF handling + reader := bufio.NewReader(os.Stdin) + + log.Printf("MCP stdio server ready, waiting for requests...") + + // Channel to signal when to stop reading + done := make(chan bool) + + // Goroutine to handle stdin reading + go func() { + defer close(done) + for { + // Read headers for Content-Length framing (use the same reader for headers and body) + contentLength := -1 + for { + line, err := reader.ReadString('\n') + if err != nil { + if err == io.EOF { + log.Printf("Input stream closed, shutting down MCP server") + return + } + log.Printf("Error reading header: %v", err) + return + } + line = strings.TrimRight(line, "\r\n") + if line == "" { + break // end of headers + } + const prefix = "content-length: " + low := strings.ToLower(line) + if strings.HasPrefix(low, prefix) { + var err error + contentLength, err = strconv.Atoi(strings.TrimSpace(low[len(prefix):])) + if err != nil { + log.Printf("Invalid Content-Length: %v", err) + return + } + } + } + + if contentLength < 0 { + log.Printf("Missing Content-Length header") + return + } + + // Read body of specified length + debugLogger.Log("Reading request body of length: %d", contentLength) + body := make([]byte, contentLength) + if _, err := io.ReadFull(reader, body); err != nil { + debugLogger.Log("Error reading body: %v", err) + log.Printf("Error reading body: %v", err) + return + } + + debugLogger.Log("Raw request body: %s", string(body)) + + var req MCPRequest + if err := json.Unmarshal(body, &req); err != nil { + debugLogger.Log("Error decoding request body: %v", err) + log.Printf("Error decoding request body: %v", err) + continue + } + + debugLogger.Log("Successfully parsed request: %s (ID: %v)", req.Method, req.ID) + log.Printf("Processing request: %s (ID: %v)", req.Method, req.ID) + + resp := server.HandleRequest(req) + resp.JSONRPC = "2.0" + + // Write Content-Length framed response + responseBytes, err := json.Marshal(resp) + if err != nil { + debugLogger.Log("Error marshaling response: %v", err) + log.Printf("Error marshaling response: %v", err) + continue + } + + debugLogger.Log("Sending response with Content-Length: %d", len(responseBytes)) + debugLogger.Log("Raw response: %s", string(responseBytes)) + + fmt.Fprintf(os.Stdout, "Content-Length: %d\r\n\r\n", len(responseBytes)) + os.Stdout.Write(responseBytes) + + debugLogger.Log("Response sent successfully for request: %s", req.Method) + log.Printf("Completed request: %s", req.Method) + } + }() + + // Wait for either completion or signal + select { + case <-done: + log.Printf("Input processing completed") + case sig := <-sigChan: + log.Printf("Received signal %v, shutting down", sig) + } + + log.Printf("MCP stdio server shutdown complete") +} diff --git a/mcp/backup/server.go b/mcp/backup/server.go new file mode 100644 index 0000000..75151f4 --- /dev/null +++ b/mcp/backup/server.go @@ -0,0 +1,822 @@ +//go:build mcp_http + +package main + +import ( + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "path/filepath" + "strconv" + "time" + + "git.teamworkapps.com/shortcut/cremote/client" +) + +// DebugLogger handles debug logging to file +type DebugLogger struct { + file *os.File + logger *log.Logger +} + +// NewDebugLogger creates a new debug logger +func NewDebugLogger() (*DebugLogger, error) { + logDir := "/tmp/cremote-mcp-logs" + if err := os.MkdirAll(logDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create log directory: %w", err) + } + + logFile := filepath.Join(logDir, fmt.Sprintf("mcp-http-%d.log", time.Now().Unix())) + file, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + return nil, fmt.Errorf("failed to open log file: %w", err) + } + + logger := log.New(file, "", log.LstdFlags|log.Lmicroseconds|log.Lshortfile) + logger.Printf("=== MCP HTTP Server Debug Log Started ===") + + return &DebugLogger{ + file: file, + logger: logger, + }, nil +} + +// Log writes a debug message +func (d *DebugLogger) Log(format string, args ...interface{}) { + if d != nil && d.logger != nil { + d.logger.Printf(format, args...) + } +} + +// LogJSON logs a JSON object with a label +func (d *DebugLogger) LogJSON(label string, obj interface{}) { + if d != nil && d.logger != nil { + jsonBytes, err := json.MarshalIndent(obj, "", " ") + if err != nil { + d.logger.Printf("%s: JSON marshal error: %v", label, err) + } else { + d.logger.Printf("%s:\n%s", label, string(jsonBytes)) + } + } +} + +// Close closes the debug logger +func (d *DebugLogger) Close() { + if d != nil && d.file != nil { + d.logger.Printf("=== MCP HTTP Server Debug Log Ended ===") + d.file.Close() + } +} + +var debugLogger *DebugLogger + +// MCPServer wraps the cremote client with MCP protocol +type MCPServer struct { + client *client.Client + currentTab string + tabHistory []string + iframeMode bool + lastError string + screenshots []string +} + +// MCPRequest represents an incoming MCP request +type MCPRequest struct { + Method string `json:"method"` + Params map[string]interface{} `json:"params"` + ID interface{} `json:"id"` +} + +// MCPResponse represents an MCP response +type MCPResponse struct { + Result interface{} `json:"result,omitempty"` + Error *MCPError `json:"error,omitempty"` + ID interface{} `json:"id"` +} + +// MCPError represents an MCP error +type MCPError struct { + Code int `json:"code"` + Message string `json:"message"` +} + +// ToolResult represents the result of a tool execution +type ToolResult struct { + Success bool `json:"success"` + Data interface{} `json:"data,omitempty"` + Screenshot string `json:"screenshot,omitempty"` + CurrentTab string `json:"current_tab,omitempty"` + TabHistory []string `json:"tab_history,omitempty"` + IframeMode bool `json:"iframe_mode"` + Error string `json:"error,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` +} + +// NewMCPServer creates a new MCP server instance +func NewMCPServer(host string, port int) *MCPServer { + return &MCPServer{ + client: client.NewClient(host, port), + tabHistory: make([]string, 0), + screenshots: make([]string, 0), + } +} + +// HandleRequest processes an MCP request +func (s *MCPServer) HandleRequest(req MCPRequest) MCPResponse { + debugLogger.Log("HandleRequest called with method: %s, ID: %v", req.Method, req.ID) + debugLogger.LogJSON("Incoming Request", req) + + var resp MCPResponse + + switch req.Method { + case "initialize": + debugLogger.Log("Handling initialize request") + resp = s.handleInitialize(req) + case "tools/list": + debugLogger.Log("Handling tools/list request") + resp = s.handleToolsList(req) + case "tools/call": + debugLogger.Log("Handling tools/call request") + resp = s.handleToolCall(req) + default: + debugLogger.Log("Unknown method: %s", req.Method) + resp = MCPResponse{ + Error: &MCPError{Code: -32601, Message: "Method not found"}, + ID: req.ID, + } + } + + debugLogger.LogJSON("Response", resp) + debugLogger.Log("HandleRequest completed for method: %s", req.Method) + return resp +} + +// handleInitialize handles the MCP initialize request +func (s *MCPServer) handleInitialize(req MCPRequest) MCPResponse { + return MCPResponse{ + Result: map[string]interface{}{ + "protocolVersion": "2024-11-05", + "capabilities": map[string]interface{}{ + "tools": map[string]interface{}{ + "listChanged": true, + }, + }, + "serverInfo": map[string]interface{}{ + "name": "cremote-mcp", + "version": "1.0.0", + }, + }, + ID: req.ID, + } +} + +// handleToolsList returns the list of available tools +func (s *MCPServer) handleToolsList(req MCPRequest) MCPResponse { + tools := []map[string]interface{}{ + { + "name": "web_navigate", + "description": "Navigate to a URL and optionally take a screenshot", + "inputSchema": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "url": map[string]interface{}{"type": "string", "description": "URL to navigate to"}, + "tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional, uses current tab)"}, + "screenshot": map[string]interface{}{"type": "boolean", "description": "Take screenshot after navigation"}, + "timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5}, + }, + "required": []string{"url"}, + }, + }, + { + "name": "web_interact", + "description": "Interact with web elements (click, fill, submit)", + "inputSchema": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "action": map[string]interface{}{"type": "string", "enum": []string{"click", "fill", "submit", "upload"}}, + "selector": map[string]interface{}{"type": "string", "description": "CSS selector for the element"}, + "value": map[string]interface{}{"type": "string", "description": "Value to fill (for fill/upload actions)"}, + "tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional)"}, + "timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5}, + }, + "required": []string{"action", "selector"}, + }, + }, + { + "name": "web_extract", + "description": "Extract data from the page (source, element HTML, or execute JavaScript)", + "inputSchema": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "type": map[string]interface{}{"type": "string", "enum": []string{"source", "element", "javascript"}}, + "selector": map[string]interface{}{"type": "string", "description": "CSS selector (for element type)"}, + "code": map[string]interface{}{"type": "string", "description": "JavaScript code (for javascript type)"}, + "tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional)"}, + "timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5}, + }, + "required": []string{"type"}, + }, + }, + { + "name": "web_screenshot", + "description": "Take a screenshot of the current page", + "inputSchema": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "output": map[string]interface{}{"type": "string", "description": "Output file path"}, + "full_page": map[string]interface{}{"type": "boolean", "description": "Capture full page", "default": false}, + "tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional)"}, + "timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5}, + }, + "required": []string{"output"}, + }, + }, + { + "name": "web_manage_tabs", + "description": "Manage browser tabs (open, close, list, switch)", + "inputSchema": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "action": map[string]interface{}{"type": "string", "enum": []string{"open", "close", "list", "switch"}}, + "tab": map[string]interface{}{"type": "string", "description": "Tab ID (for close/switch actions)"}, + "timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5}, + }, + "required": []string{"action"}, + }, + }, + { + "name": "web_iframe", + "description": "Switch iframe context for subsequent operations", + "inputSchema": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "action": map[string]interface{}{"type": "string", "enum": []string{"enter", "exit"}}, + "selector": map[string]interface{}{"type": "string", "description": "Iframe CSS selector (for enter action)"}, + "tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional)"}, + }, + "required": []string{"action"}, + }, + }, + } + + return MCPResponse{ + Result: map[string]interface{}{"tools": tools}, + ID: req.ID, + } +} + +// handleToolCall executes a tool call +func (s *MCPServer) handleToolCall(req MCPRequest) MCPResponse { + params, ok := req.Params["arguments"].(map[string]interface{}) + if !ok { + return MCPResponse{ + Error: &MCPError{Code: -32602, Message: "Invalid parameters"}, + ID: req.ID, + } + } + + toolName, ok := req.Params["name"].(string) + if !ok { + return MCPResponse{ + Error: &MCPError{Code: -32602, Message: "Tool name required"}, + ID: req.ID, + } + } + + var result ToolResult + var err error + + switch toolName { + case "web_navigate": + result, err = s.handleNavigate(params) + case "web_interact": + result, err = s.handleInteract(params) + case "web_extract": + result, err = s.handleExtract(params) + case "web_screenshot": + result, err = s.handleScreenshot(params) + case "web_manage_tabs": + result, err = s.handleManageTabs(params) + case "web_iframe": + result, err = s.handleIframe(params) + default: + return MCPResponse{ + Error: &MCPError{Code: -32601, Message: "Unknown tool: " + toolName}, + ID: req.ID, + } + } + + if err != nil { + result.Success = false + result.Error = err.Error() + s.lastError = err.Error() + } + + // Always include current state in response + result.CurrentTab = s.currentTab + result.TabHistory = s.tabHistory + result.IframeMode = s.iframeMode + + return MCPResponse{ + Result: result, + ID: req.ID, + } +} + +// Helper function to get string parameter with default +func getStringParam(params map[string]interface{}, key, defaultValue string) string { + if val, ok := params[key].(string); ok { + return val + } + return defaultValue +} + +// Helper function to get int parameter with default +func getIntParam(params map[string]interface{}, key string, defaultValue int) int { + if val, ok := params[key].(float64); ok { + return int(val) + } + if val, ok := params[key].(int); ok { + return val + } + return defaultValue +} + +// Helper function to get bool parameter with default +func getBoolParam(params map[string]interface{}, key string, defaultValue bool) bool { + if val, ok := params[key].(bool); ok { + return val + } + return defaultValue +} + +// Helper function to resolve tab ID +func (s *MCPServer) resolveTabID(tabParam string) string { + if tabParam != "" { + return tabParam + } + return s.currentTab +} + +// handleNavigate handles web navigation +func (s *MCPServer) handleNavigate(params map[string]interface{}) (ToolResult, error) { + url := getStringParam(params, "url", "") + if url == "" { + return ToolResult{}, fmt.Errorf("url parameter is required") + } + + tab := getStringParam(params, "tab", "") + timeout := getIntParam(params, "timeout", 5) + takeScreenshot := getBoolParam(params, "screenshot", false) + + // If no tab specified and we don't have a current tab, open one + if tab == "" && s.currentTab == "" { + newTab, err := s.client.OpenTab(timeout) + if err != nil { + return ToolResult{}, fmt.Errorf("failed to open new tab: %w", err) + } + s.currentTab = newTab + s.tabHistory = append(s.tabHistory, newTab) + tab = newTab + } else if tab == "" { + tab = s.currentTab + } + + // Load the URL + err := s.client.LoadURL(tab, url, timeout) + if err != nil { + return ToolResult{}, fmt.Errorf("failed to load URL: %w", err) + } + + result := ToolResult{ + Success: true, + Data: map[string]string{"url": url, "tab": tab}, + } + + // Take screenshot if requested + if takeScreenshot { + screenshotPath := fmt.Sprintf("/tmp/navigate-%d.png", time.Now().Unix()) + err = s.client.TakeScreenshot(tab, screenshotPath, false, timeout) + if err == nil { + result.Screenshot = screenshotPath + s.screenshots = append(s.screenshots, screenshotPath) + } + } + + return result, nil +} + +// handleInteract handles web element interactions +func (s *MCPServer) handleInteract(params map[string]interface{}) (ToolResult, error) { + action := getStringParam(params, "action", "") + selector := getStringParam(params, "selector", "") + value := getStringParam(params, "value", "") + tab := s.resolveTabID(getStringParam(params, "tab", "")) + timeout := getIntParam(params, "timeout", 5) + + if action == "" || selector == "" { + return ToolResult{}, fmt.Errorf("action and selector parameters are required") + } + + if tab == "" { + return ToolResult{}, fmt.Errorf("no active tab available") + } + + var err error + result := ToolResult{Success: true} + + switch action { + case "click": + err = s.client.ClickElement(tab, selector, timeout, timeout) + result.Data = map[string]string{"action": "clicked", "selector": selector} + + case "fill": + if value == "" { + return ToolResult{}, fmt.Errorf("value parameter is required for fill action") + } + err = s.client.FillFormField(tab, selector, value, timeout, timeout) + result.Data = map[string]string{"action": "filled", "selector": selector, "value": value} + + case "submit": + err = s.client.SubmitForm(tab, selector, timeout, timeout) + result.Data = map[string]string{"action": "submitted", "selector": selector} + + case "upload": + if value == "" { + return ToolResult{}, fmt.Errorf("value parameter (file path) is required for upload action") + } + err = s.client.UploadFile(tab, selector, value, timeout, timeout) + result.Data = map[string]string{"action": "uploaded", "selector": selector, "file": value} + + default: + return ToolResult{}, fmt.Errorf("unknown action: %s", action) + } + + if err != nil { + return ToolResult{}, fmt.Errorf("failed to %s element: %w", action, err) + } + + return result, nil +} + +// handleExtract handles data extraction from pages +func (s *MCPServer) handleExtract(params map[string]interface{}) (ToolResult, error) { + extractType := getStringParam(params, "type", "") + selector := getStringParam(params, "selector", "") + code := getStringParam(params, "code", "") + tab := s.resolveTabID(getStringParam(params, "tab", "")) + timeout := getIntParam(params, "timeout", 5) + + if extractType == "" { + return ToolResult{}, fmt.Errorf("type parameter is required") + } + + if tab == "" { + return ToolResult{}, fmt.Errorf("no active tab available") + } + + var data interface{} + var err error + + switch extractType { + case "source": + data, err = s.client.GetPageSource(tab, timeout) + + case "element": + if selector == "" { + return ToolResult{}, fmt.Errorf("selector parameter is required for element extraction") + } + data, err = s.client.GetElementHTML(tab, selector, timeout) + + case "javascript": + if code == "" { + return ToolResult{}, fmt.Errorf("code parameter is required for javascript extraction") + } + data, err = s.client.EvalJS(tab, code, timeout) + + default: + return ToolResult{}, fmt.Errorf("unknown extraction type: %s", extractType) + } + + if err != nil { + return ToolResult{}, fmt.Errorf("failed to extract %s: %w", extractType, err) + } + + return ToolResult{ + Success: true, + Data: data, + Metadata: map[string]string{ + "type": extractType, + "selector": selector, + }, + }, nil +} + +// handleScreenshot handles screenshot capture +func (s *MCPServer) handleScreenshot(params map[string]interface{}) (ToolResult, error) { + output := getStringParam(params, "output", "") + if output == "" { + output = fmt.Sprintf("/tmp/screenshot-%d.png", time.Now().Unix()) + } + + tab := s.resolveTabID(getStringParam(params, "tab", "")) + fullPage := getBoolParam(params, "full_page", false) + timeout := getIntParam(params, "timeout", 5) + + if tab == "" { + return ToolResult{}, fmt.Errorf("no active tab available") + } + + err := s.client.TakeScreenshot(tab, output, fullPage, timeout) + if err != nil { + return ToolResult{}, fmt.Errorf("failed to take screenshot: %w", err) + } + + s.screenshots = append(s.screenshots, output) + + return ToolResult{ + Success: true, + Screenshot: output, + Data: map[string]interface{}{ + "output": output, + "full_page": fullPage, + "tab": tab, + }, + }, nil +} + +// handleManageTabs handles tab management operations +func (s *MCPServer) handleManageTabs(params map[string]interface{}) (ToolResult, error) { + action := getStringParam(params, "action", "") + tab := getStringParam(params, "tab", "") + timeout := getIntParam(params, "timeout", 5) + + if action == "" { + return ToolResult{}, fmt.Errorf("action parameter is required") + } + + var data interface{} + var err error + + switch action { + case "open": + newTab, err := s.client.OpenTab(timeout) + if err != nil { + return ToolResult{}, fmt.Errorf("failed to open tab: %w", err) + } + s.currentTab = newTab + s.tabHistory = append(s.tabHistory, newTab) + data = map[string]string{"tab": newTab, "action": "opened"} + + case "close": + targetTab := s.resolveTabID(tab) + if targetTab == "" { + return ToolResult{}, fmt.Errorf("no tab to close") + } + err = s.client.CloseTab(targetTab, timeout) + if err != nil { + return ToolResult{}, fmt.Errorf("failed to close tab: %w", err) + } + // Remove from history and update current tab + s.removeTabFromHistory(targetTab) + data = map[string]string{"tab": targetTab, "action": "closed"} + + case "list": + tabs, err := s.client.ListTabs() + if err != nil { + return ToolResult{}, fmt.Errorf("failed to list tabs: %w", err) + } + data = tabs + + case "switch": + if tab == "" { + return ToolResult{}, fmt.Errorf("tab parameter is required for switch action") + } + s.currentTab = tab + // Move to end of history if it exists, otherwise add it + s.removeTabFromHistory(tab) + s.tabHistory = append(s.tabHistory, tab) + data = map[string]string{"tab": tab, "action": "switched"} + + default: + return ToolResult{}, fmt.Errorf("unknown tab action: %s", action) + } + + if err != nil { + return ToolResult{}, err + } + + return ToolResult{ + Success: true, + Data: data, + }, nil +} + +// handleIframe handles iframe context switching +func (s *MCPServer) handleIframe(params map[string]interface{}) (ToolResult, error) { + action := getStringParam(params, "action", "") + selector := getStringParam(params, "selector", "") + tab := s.resolveTabID(getStringParam(params, "tab", "")) + + if action == "" { + return ToolResult{}, fmt.Errorf("action parameter is required") + } + + if tab == "" { + return ToolResult{}, fmt.Errorf("no active tab available") + } + + var err error + var data map[string]string + + switch action { + case "enter": + if selector == "" { + return ToolResult{}, fmt.Errorf("selector parameter is required for enter action") + } + err = s.client.SwitchToIframe(tab, selector) + s.iframeMode = true + data = map[string]string{"action": "entered", "selector": selector} + + case "exit": + err = s.client.SwitchToMain(tab) + s.iframeMode = false + data = map[string]string{"action": "exited"} + + default: + return ToolResult{}, fmt.Errorf("unknown iframe action: %s", action) + } + + if err != nil { + return ToolResult{}, fmt.Errorf("failed to %s iframe: %w", action, err) + } + + return ToolResult{ + Success: true, + Data: data, + }, nil +} + +// Helper function to remove tab from history +func (s *MCPServer) removeTabFromHistory(tabID string) { + for i, id := range s.tabHistory { + if id == tabID { + s.tabHistory = append(s.tabHistory[:i], s.tabHistory[i+1:]...) + break + } + } + // If we removed the current tab, set current to the last in history + if s.currentTab == tabID { + if len(s.tabHistory) > 0 { + s.currentTab = s.tabHistory[len(s.tabHistory)-1] + } else { + s.currentTab = "" + } + } +} + +// HTTP handler for MCP requests +func (s *MCPServer) ServeHTTP(w http.ResponseWriter, r *http.Request) { + debugLogger.Log("ServeHTTP: Received %s request from %s", r.Method, r.RemoteAddr) + debugLogger.Log("ServeHTTP: Request URL: %s", r.URL.String()) + debugLogger.Log("ServeHTTP: Request headers: %v", r.Header) + + // Set CORS headers for browser compatibility + w.Header().Set("Access-Control-Allow-Origin", "*") + w.Header().Set("Access-Control-Allow-Methods", "POST, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type") + w.Header().Set("Content-Type", "application/json") + + // Handle preflight requests + if r.Method == "OPTIONS" { + debugLogger.Log("ServeHTTP: Handling OPTIONS preflight request") + w.WriteHeader(http.StatusOK) + return + } + + // Only accept POST requests + if r.Method != "POST" { + debugLogger.Log("ServeHTTP: Method not allowed: %s", r.Method) + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + // Read and decode the request + var req MCPRequest + decoder := json.NewDecoder(r.Body) + if err := decoder.Decode(&req); err != nil { + debugLogger.Log("ServeHTTP: Error decoding request: %v", err) + log.Printf("Error decoding request: %v", err) + http.Error(w, "Invalid JSON", http.StatusBadRequest) + return + } + + debugLogger.LogJSON("HTTP Request", req) + log.Printf("Processing HTTP request: %s (ID: %v)", req.Method, req.ID) + + // Handle the request + resp := s.HandleRequest(req) + + debugLogger.LogJSON("HTTP Response", resp) + + // Encode and send the response + encoder := json.NewEncoder(w) + if err := encoder.Encode(resp); err != nil { + debugLogger.Log("ServeHTTP: Error encoding response: %v", err) + log.Printf("Error encoding response: %v", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + return + } + + debugLogger.Log("ServeHTTP: Response sent successfully for request: %s", req.Method) + log.Printf("Completed HTTP request: %s", req.Method) +} + +func main() { + // Initialize debug logger + var err error + debugLogger, err = NewDebugLogger() + if err != nil { + log.Printf("Warning: Failed to initialize debug logger: %v", err) + // Continue without debug logging + } else { + defer debugLogger.Close() + debugLogger.Log("MCP HTTP Server starting up") + } + + // Get cremote daemon connection settings + cremoteHost := os.Getenv("CREMOTE_HOST") + if cremoteHost == "" { + cremoteHost = "localhost" + } + + cremotePortStr := os.Getenv("CREMOTE_PORT") + cremotePort := 8989 + if cremotePortStr != "" { + if p, err := strconv.Atoi(cremotePortStr); err == nil { + cremotePort = p + } + } + + // Get HTTP server settings + httpHost := os.Getenv("MCP_HOST") + if httpHost == "" { + httpHost = "localhost" + } + + httpPortStr := os.Getenv("MCP_PORT") + httpPort := 8990 + if httpPortStr != "" { + if p, err := strconv.Atoi(httpPortStr); err == nil { + httpPort = p + } + } + + debugLogger.Log("HTTP server will listen on %s:%d", httpHost, httpPort) + debugLogger.Log("Connecting to cremote daemon at %s:%d", cremoteHost, cremotePort) + + log.Printf("Starting MCP HTTP server on %s:%d", httpHost, httpPort) + log.Printf("Connecting to cremote daemon at %s:%d", cremoteHost, cremotePort) + + // Create the MCP server + mcpServer := NewMCPServer(cremoteHost, cremotePort) + + // Set up HTTP routes + http.Handle("/mcp", mcpServer) + + // Health check endpoint + http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]string{ + "status": "healthy", + "cremote_host": cremoteHost, + "cremote_port": strconv.Itoa(cremotePort), + }) + }) + + // Root endpoint with basic info + http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]interface{}{ + "service": "Cremote MCP Server", + "version": "1.0.0", + "endpoints": map[string]string{ + "mcp": "/mcp", + "health": "/health", + }, + "cremote_daemon": fmt.Sprintf("%s:%d", cremoteHost, cremotePort), + }) + }) + + // Start the HTTP server + addr := fmt.Sprintf("%s:%d", httpHost, httpPort) + log.Printf("MCP HTTP server listening on http://%s", addr) + log.Printf("MCP endpoint: http://%s/mcp", addr) + log.Printf("Health check: http://%s/health", addr) + + if err := http.ListenAndServe(addr, nil); err != nil { + log.Fatalf("HTTP server failed: %v", err) + } +} diff --git a/mcp/claude_desktop_config.json b/mcp/claude_desktop_config.json new file mode 100644 index 0000000..729a867 --- /dev/null +++ b/mcp/claude_desktop_config.json @@ -0,0 +1,11 @@ +{ + "mcpServers": { + "cremote": { + "command": "/path/to/cremote/mcp/cremote-mcp", + "env": { + "CREMOTE_HOST": "localhost", + "CREMOTE_PORT": "8989" + } + } + } +} diff --git a/mcp/cremote-mcp b/mcp/cremote-mcp new file mode 100755 index 0000000..4c1c2c8 Binary files /dev/null and b/mcp/cremote-mcp differ diff --git a/mcp/go.mod b/mcp/go.mod new file mode 100644 index 0000000..a1ce708 --- /dev/null +++ b/mcp/go.mod @@ -0,0 +1,22 @@ +module git.teamworkapps.com/shortcut/cremote/mcp + +go 1.24.1 + +replace git.teamworkapps.com/shortcut/cremote => ../ + +require ( + git.teamworkapps.com/shortcut/cremote v0.0.0-00010101000000-000000000000 + github.com/mark3labs/mcp-go v0.37.0 +) + +require ( + github.com/bahlo/generic-list-go v0.2.0 // indirect + github.com/buger/jsonparser v1.1.1 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/invopop/jsonschema v0.13.0 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/spf13/cast v1.7.1 // indirect + github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect + github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/mcp/go.sum b/mcp/go.sum new file mode 100644 index 0000000..6a2363e --- /dev/null +++ b/mcp/go.sum @@ -0,0 +1,39 @@ +github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk= +github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg= +github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= +github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E= +github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mark3labs/mcp-go v0.37.0 h1:BywvZLPRT6Zx6mMG/MJfxLSZQkTGIcJSEGKsvr4DsoQ= +github.com/mark3labs/mcp-go v0.37.0/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y= +github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc= +github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw= +github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/mcp/main.go b/mcp/main.go new file mode 100644 index 0000000..1227787 --- /dev/null +++ b/mcp/main.go @@ -0,0 +1,695 @@ +package main + +import ( + "context" + "fmt" + "log" + "os" + "strconv" + "time" + + "git.teamworkapps.com/shortcut/cremote/client" + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" +) + +// CremoteServer wraps the cremote client for MCP +type CremoteServer struct { + client *client.Client + currentTab string + tabHistory []string + iframeMode bool + screenshots []string +} + +// NewCremoteServer creates a new cremote MCP server +func NewCremoteServer(host string, port int) *CremoteServer { + return &CremoteServer{ + client: client.NewClient(host, port), + tabHistory: make([]string, 0), + screenshots: make([]string, 0), + } +} + +// Helper functions for parameter extraction +func getStringParam(params map[string]any, key, defaultValue string) string { + if val, ok := params[key].(string); ok { + return val + } + return defaultValue +} + +func getBoolParam(params map[string]any, key string, defaultValue bool) bool { + if val, ok := params[key].(bool); ok { + return val + } + return defaultValue +} + +func getIntParam(params map[string]any, key string, defaultValue int) int { + if val, ok := params[key].(float64); ok { + return int(val) + } + if val, ok := params[key].(int); ok { + return val + } + return defaultValue +} + +func main() { + // Get cremote daemon connection settings + cremoteHost := os.Getenv("CREMOTE_HOST") + if cremoteHost == "" { + cremoteHost = "localhost" + } + + cremotePortStr := os.Getenv("CREMOTE_PORT") + cremotePort := 8989 + if cremotePortStr != "" { + if p, err := strconv.Atoi(cremotePortStr); err == nil { + cremotePort = p + } + } + + log.Printf("Starting cremote MCP server, connecting to cremote daemon at %s:%d", cremoteHost, cremotePort) + + // Create the cremote server + cremoteServer := NewCremoteServer(cremoteHost, cremotePort) + + // Create MCP server + mcpServer := server.NewMCPServer("cremote-mcp", "1.0.0") + + // Register web_navigate tool + mcpServer.AddTool(mcp.Tool{ + Name: "web_navigate", + Description: "Navigate to a URL and optionally take a screenshot", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "url": map[string]any{ + "type": "string", + "description": "URL to navigate to", + }, + "tab": map[string]any{ + "type": "string", + "description": "Tab ID (optional, uses current tab)", + }, + "timeout": map[string]any{ + "type": "integer", + "description": "Timeout in seconds", + "default": 5, + }, + "screenshot": map[string]any{ + "type": "boolean", + "description": "Take screenshot after navigation", + }, + }, + Required: []string{"url"}, + }, + }, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + // Convert arguments to map + params, ok := request.Params.Arguments.(map[string]any) + if !ok { + return nil, fmt.Errorf("invalid arguments format") + } + + url := getStringParam(params, "url", "") + if url == "" { + return nil, fmt.Errorf("url parameter is required") + } + + tab := getStringParam(params, "tab", cremoteServer.currentTab) + timeout := getIntParam(params, "timeout", 5) + takeScreenshot := getBoolParam(params, "screenshot", false) + + // If no tab specified and no current tab, create a new one + if tab == "" { + newTab, err := cremoteServer.client.OpenTab(timeout) + if err != nil { + return nil, fmt.Errorf("failed to create new tab: %w", err) + } + tab = newTab + cremoteServer.currentTab = tab + cremoteServer.tabHistory = append(cremoteServer.tabHistory, tab) + } + + // Load the URL + err := cremoteServer.client.LoadURL(tab, url, timeout) + if err != nil { + return nil, fmt.Errorf("failed to load URL: %w", err) + } + + message := fmt.Sprintf("Successfully navigated to %s in tab %s", url, tab) + + // Take screenshot if requested + if takeScreenshot { + screenshotPath := fmt.Sprintf("/tmp/navigate-%d.png", time.Now().Unix()) + err = cremoteServer.client.TakeScreenshot(tab, screenshotPath, false, timeout) + if err == nil { + cremoteServer.screenshots = append(cremoteServer.screenshots, screenshotPath) + message += fmt.Sprintf(" (screenshot saved to %s)", screenshotPath) + } + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.NewTextContent(message), + }, + IsError: false, + }, nil + }) + + // Register web_interact tool + mcpServer.AddTool(mcp.Tool{ + Name: "web_interact", + Description: "Interact with web elements (click, fill, submit)", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "action": map[string]any{ + "type": "string", + "description": "Action to perform", + "enum": []any{"click", "fill", "submit", "upload"}, + }, + "selector": map[string]any{ + "type": "string", + "description": "CSS selector for the element", + }, + "value": map[string]any{ + "type": "string", + "description": "Value to fill (for fill/upload actions)", + }, + "tab": map[string]any{ + "type": "string", + "description": "Tab ID (optional)", + }, + "timeout": map[string]any{ + "type": "integer", + "description": "Timeout in seconds", + "default": 5, + }, + }, + Required: []string{"action", "selector"}, + }, + }, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + // Convert arguments to map + params, ok := request.Params.Arguments.(map[string]any) + if !ok { + return nil, fmt.Errorf("invalid arguments format") + } + + action := getStringParam(params, "action", "") + selector := getStringParam(params, "selector", "") + value := getStringParam(params, "value", "") + tab := getStringParam(params, "tab", cremoteServer.currentTab) + timeout := getIntParam(params, "timeout", 5) + + if action == "" { + return nil, fmt.Errorf("action parameter is required") + } + if selector == "" { + return nil, fmt.Errorf("selector parameter is required") + } + if tab == "" { + return nil, fmt.Errorf("no tab available - navigate to a page first") + } + + var err error + var message string + + switch action { + case "click": + err = cremoteServer.client.ClickElement(tab, selector, timeout, timeout) + message = fmt.Sprintf("Clicked element %s", selector) + + case "fill": + if value == "" { + return nil, fmt.Errorf("value parameter is required for fill action") + } + err = cremoteServer.client.FillFormField(tab, selector, value, timeout, timeout) + message = fmt.Sprintf("Filled element %s with value", selector) + + case "submit": + err = cremoteServer.client.SubmitForm(tab, selector, timeout, timeout) + message = fmt.Sprintf("Submitted form %s", selector) + + case "upload": + if value == "" { + return nil, fmt.Errorf("value parameter (file path) is required for upload action") + } + err = cremoteServer.client.UploadFile(tab, selector, value, timeout, timeout) + message = fmt.Sprintf("Uploaded file %s to element %s", value, selector) + + default: + return nil, fmt.Errorf("unknown action: %s", action) + } + + if err != nil { + return nil, fmt.Errorf("failed to %s element: %w", action, err) + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.NewTextContent(message), + }, + IsError: false, + }, nil + }) + + // Register web_extract tool + mcpServer.AddTool(mcp.Tool{ + Name: "web_extract", + Description: "Extract data from the page (source, element HTML, or execute JavaScript)", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "type": map[string]any{ + "type": "string", + "description": "Type of extraction", + "enum": []any{"source", "element", "javascript"}, + }, + "selector": map[string]any{ + "type": "string", + "description": "CSS selector (for element type)", + }, + "code": map[string]any{ + "type": "string", + "description": "JavaScript code (for javascript type)", + }, + "tab": map[string]any{ + "type": "string", + "description": "Tab ID (optional)", + }, + "timeout": map[string]any{ + "type": "integer", + "description": "Timeout in seconds", + "default": 5, + }, + }, + Required: []string{"type"}, + }, + }, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + // Convert arguments to map + params, ok := request.Params.Arguments.(map[string]any) + if !ok { + return nil, fmt.Errorf("invalid arguments format") + } + + extractType := getStringParam(params, "type", "") + selector := getStringParam(params, "selector", "") + code := getStringParam(params, "code", "") + tab := getStringParam(params, "tab", cremoteServer.currentTab) + timeout := getIntParam(params, "timeout", 5) + + if extractType == "" { + return nil, fmt.Errorf("type parameter is required") + } + if tab == "" { + return nil, fmt.Errorf("no tab available - navigate to a page first") + } + + var data string + var err error + + switch extractType { + case "source": + data, err = cremoteServer.client.GetPageSource(tab, timeout) + + case "element": + if selector == "" { + return nil, fmt.Errorf("selector parameter is required for element extraction") + } + data, err = cremoteServer.client.GetElementHTML(tab, selector, timeout) + + case "javascript": + if code == "" { + return nil, fmt.Errorf("code parameter is required for javascript extraction") + } + data, err = cremoteServer.client.EvalJS(tab, code, timeout) + + default: + return nil, fmt.Errorf("unknown extraction type: %s", extractType) + } + + if err != nil { + return nil, fmt.Errorf("failed to extract %s: %w", extractType, err) + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.NewTextContent(fmt.Sprintf("Extracted %s data: %s", extractType, data)), + }, + IsError: false, + }, nil + }) + + // Register web_screenshot tool + mcpServer.AddTool(mcp.Tool{ + Name: "web_screenshot", + Description: "Take a screenshot of the current page", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "output": map[string]any{ + "type": "string", + "description": "Output file path", + }, + "full_page": map[string]any{ + "type": "boolean", + "description": "Capture full page", + "default": false, + }, + "tab": map[string]any{ + "type": "string", + "description": "Tab ID (optional)", + }, + "timeout": map[string]any{ + "type": "integer", + "description": "Timeout in seconds", + "default": 5, + }, + }, + Required: []string{"output"}, + }, + }, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + // Convert arguments to map + params, ok := request.Params.Arguments.(map[string]any) + if !ok { + return nil, fmt.Errorf("invalid arguments format") + } + + output := getStringParam(params, "output", "") + fullPage := getBoolParam(params, "full_page", false) + tab := getStringParam(params, "tab", cremoteServer.currentTab) + timeout := getIntParam(params, "timeout", 5) + + if output == "" { + return nil, fmt.Errorf("output parameter is required") + } + if tab == "" { + return nil, fmt.Errorf("no tab available - navigate to a page first") + } + + err := cremoteServer.client.TakeScreenshot(tab, output, fullPage, timeout) + if err != nil { + return nil, fmt.Errorf("failed to take screenshot: %w", err) + } + + cremoteServer.screenshots = append(cremoteServer.screenshots, output) + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.NewTextContent(fmt.Sprintf("Screenshot saved to %s", output)), + }, + IsError: false, + }, nil + }) + + // Register web_manage_tabs tool + mcpServer.AddTool(mcp.Tool{ + Name: "web_manage_tabs", + Description: "Manage browser tabs (open, close, list, switch)", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "action": map[string]any{ + "type": "string", + "description": "Action to perform", + "enum": []any{"open", "close", "list", "switch"}, + }, + "tab": map[string]any{ + "type": "string", + "description": "Tab ID (for close/switch actions)", + }, + "timeout": map[string]any{ + "type": "integer", + "description": "Timeout in seconds", + "default": 5, + }, + }, + Required: []string{"action"}, + }, + }, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + // Convert arguments to map + params, ok := request.Params.Arguments.(map[string]any) + if !ok { + return nil, fmt.Errorf("invalid arguments format") + } + + action := getStringParam(params, "action", "") + tab := getStringParam(params, "tab", "") + timeout := getIntParam(params, "timeout", 5) + + if action == "" { + return nil, fmt.Errorf("action parameter is required") + } + + var err error + var message string + + switch action { + case "open": + newTab, err := cremoteServer.client.OpenTab(timeout) + if err != nil { + return nil, fmt.Errorf("failed to open new tab: %w", err) + } + cremoteServer.currentTab = newTab + cremoteServer.tabHistory = append(cremoteServer.tabHistory, newTab) + message = fmt.Sprintf("Opened new tab: %s", newTab) + + case "close": + if tab == "" { + return nil, fmt.Errorf("tab parameter is required for close action") + } + err = cremoteServer.client.CloseTab(tab, timeout) + if err != nil { + return nil, fmt.Errorf("failed to close tab: %w", err) + } + // Remove from history and update current tab + for i, id := range cremoteServer.tabHistory { + if id == tab { + cremoteServer.tabHistory = append(cremoteServer.tabHistory[:i], cremoteServer.tabHistory[i+1:]...) + break + } + } + if cremoteServer.currentTab == tab { + if len(cremoteServer.tabHistory) > 0 { + cremoteServer.currentTab = cremoteServer.tabHistory[len(cremoteServer.tabHistory)-1] + } else { + cremoteServer.currentTab = "" + } + } + message = fmt.Sprintf("Closed tab: %s", tab) + + case "list": + tabs, err := cremoteServer.client.ListTabs() + if err != nil { + return nil, fmt.Errorf("failed to list tabs: %w", err) + } + message = fmt.Sprintf("Found %d tabs: %v", len(tabs), tabs) + + case "switch": + if tab == "" { + return nil, fmt.Errorf("tab parameter is required for switch action") + } + cremoteServer.currentTab = tab + // Add to history if not already there + found := false + for _, t := range cremoteServer.tabHistory { + if t == tab { + found = true + break + } + } + if !found { + cremoteServer.tabHistory = append(cremoteServer.tabHistory, tab) + } + message = fmt.Sprintf("Switched to tab: %s", tab) + + default: + return nil, fmt.Errorf("unknown tab action: %s", action) + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.NewTextContent(message), + }, + IsError: false, + }, nil + }) + + // Register web_iframe tool + mcpServer.AddTool(mcp.Tool{ + Name: "web_iframe", + Description: "Switch iframe context for subsequent operations", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "action": map[string]any{ + "type": "string", + "description": "Action to perform", + "enum": []any{"enter", "exit"}, + }, + "selector": map[string]any{ + "type": "string", + "description": "Iframe CSS selector (for enter action)", + }, + "tab": map[string]any{ + "type": "string", + "description": "Tab ID (optional)", + }, + }, + Required: []string{"action"}, + }, + }, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + // Convert arguments to map + params, ok := request.Params.Arguments.(map[string]any) + if !ok { + return nil, fmt.Errorf("invalid arguments format") + } + + action := getStringParam(params, "action", "") + selector := getStringParam(params, "selector", "") + tab := getStringParam(params, "tab", cremoteServer.currentTab) + + if action == "" { + return nil, fmt.Errorf("action parameter is required") + } + if tab == "" { + return nil, fmt.Errorf("no tab available - navigate to a page first") + } + + var err error + var message string + + switch action { + case "enter": + if selector == "" { + return nil, fmt.Errorf("selector parameter is required for enter action") + } + err = cremoteServer.client.SwitchToIframe(tab, selector) + cremoteServer.iframeMode = true + message = fmt.Sprintf("Entered iframe: %s", selector) + + case "exit": + err = cremoteServer.client.SwitchToMain(tab) + cremoteServer.iframeMode = false + message = "Exited iframe context" + + default: + return nil, fmt.Errorf("unknown iframe action: %s", action) + } + + if err != nil { + return nil, fmt.Errorf("failed to %s iframe: %w", action, err) + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.NewTextContent(message), + }, + IsError: false, + }, nil + }) + + // Register file_upload tool + mcpServer.AddTool(mcp.Tool{ + Name: "file_upload", + Description: "Upload a file from the client to the container for use in form uploads", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "local_path": map[string]any{ + "type": "string", + "description": "Path to the file on the client machine", + }, + "container_path": map[string]any{ + "type": "string", + "description": "Optional path where to store the file in the container (defaults to /tmp/filename)", + }, + }, + Required: []string{"local_path"}, + }, + }, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + // Convert arguments to map + params, ok := request.Params.Arguments.(map[string]any) + if !ok { + return nil, fmt.Errorf("invalid arguments format") + } + + localPath := getStringParam(params, "local_path", "") + containerPath := getStringParam(params, "container_path", "") + + if localPath == "" { + return nil, fmt.Errorf("local_path parameter is required") + } + + // Upload the file to the container + targetPath, err := cremoteServer.client.UploadFileToContainer(localPath, containerPath) + if err != nil { + return nil, fmt.Errorf("failed to upload file: %w", err) + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.NewTextContent(fmt.Sprintf("File uploaded successfully to container at: %s", targetPath)), + }, + IsError: false, + }, nil + }) + + // Register file_download tool + mcpServer.AddTool(mcp.Tool{ + Name: "file_download", + Description: "Download a file from the container to the client (e.g., downloaded files from browser)", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "container_path": map[string]any{ + "type": "string", + "description": "Path to the file in the container", + }, + "local_path": map[string]any{ + "type": "string", + "description": "Path where to save the file on the client machine", + }, + }, + Required: []string{"container_path", "local_path"}, + }, + }, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + // Convert arguments to map + params, ok := request.Params.Arguments.(map[string]any) + if !ok { + return nil, fmt.Errorf("invalid arguments format") + } + + containerPath := getStringParam(params, "container_path", "") + localPath := getStringParam(params, "local_path", "") + + if containerPath == "" { + return nil, fmt.Errorf("container_path parameter is required") + } + if localPath == "" { + return nil, fmt.Errorf("local_path parameter is required") + } + + // Download the file from the container + err := cremoteServer.client.DownloadFileFromContainer(containerPath, localPath) + if err != nil { + return nil, fmt.Errorf("failed to download file: %w", err) + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.NewTextContent(fmt.Sprintf("File downloaded successfully from container to: %s", localPath)), + }, + IsError: false, + }, nil + }) + + // Start the server + log.Printf("Cremote MCP server ready") + if err := server.ServeStdio(mcpServer); err != nil { + log.Fatalf("Server error: %v", err) + } +} diff --git a/mcp/test_mcp.sh b/mcp/test_mcp.sh new file mode 100755 index 0000000..150a1fd --- /dev/null +++ b/mcp/test_mcp.sh @@ -0,0 +1,5 @@ +#!/bin/bash +( +echo '{"method":"tools/list","params":{},"id":1}' +sleep 1 +) | ./cremote-mcp diff --git a/test-upload.txt b/test-upload.txt new file mode 100644 index 0000000..3e9b528 --- /dev/null +++ b/test-upload.txt @@ -0,0 +1 @@ +This is a test file for cremote file transfer