add debug

2025-08-15 16:29:57 -05:00
parent 4ade9ebafe
commit cf34368901
4 changed files with 49 additions and 645 deletions
--- a/daemon/cmd/cremotedaemon/main.go
+++ b/daemon/cmd/cremotedaemon/main.go
@@ -14,13 +14,16 @@ import (
 var (
 	daemonHost = flag.String("listen", "localhost", "Listen address")
 	port       = flag.Int("port", 8989, "Listen port")
 	debug      = flag.Bool("debug", false, "Enable debug logging")
 )
 func main() {
 	flag.Parse()
 	log.Printf("Starting cremote daemon on %s:%d (debug: %v)", *daemonHost, *port, *debug)
 	// Create and start the daemon
-	d, err := daemon.NewDaemon(*daemonHost, *port)
+	d, err := daemon.NewDaemon(*daemonHost, *port, *debug)
 	if err != nil {
 		log.Fatalf("Failed to create daemon: %v", err)
 	}
--- a/daemon/daemon.go
+++ b/daemon/daemon.go
@@ -26,6 +26,7 @@ type Daemon struct {
 	currentTab  string                  // ID of the current/last used tab
 	tabHistory  []string                // Stack of tab IDs in order of activation (LIFO)
 	consoleLogs map[string][]ConsoleLog // Maps tab ID to console logs
 	debug       bool                    // Enable debug logging
 	mu          sync.Mutex
 	server      *http.Server
 }
@@ -71,11 +72,26 @@ func checkChromeDevTools(port int) bool {
 	return resp.StatusCode == 200
 }
 // debugLog logs a message only if debug mode is enabled
 func (d *Daemon) debugLog(format string, args ...interface{}) {
 	if d.debug {
 		log.Printf("[DEBUG] "+format, args...)
 	}
 }
 // NewDaemon creates a new daemon instance
-func NewDaemon(host string, port int) (*Daemon, error) {
+func NewDaemon(host string, port int, debug bool) (*Daemon, error) {
 	if debug {
 		log.Printf("[DEBUG] Creating new daemon on %s:%d", host, port)
 	}
 	// Check if Chrome is running on the debug port
 	chromePort := 9222 // Default Chrome debug port
 	if debug {
 		log.Printf("[DEBUG] Checking if Chrome is running on port %d", chromePort)
 	}
 	if !checkChromeRunning(chromePort) {
 		return nil, fmt.Errorf("Chromium is not running with remote debugging enabled on port %d.\n\nTo start Chromium with remote debugging:\n  chromium --remote-debugging-port=%d --user-data-dir=/tmp/chromium-debug &\n  # or\n  google-chrome --remote-debugging-port=%d --user-data-dir=/tmp/chrome-debug &\n\nNote: The --user-data-dir flag is required to avoid conflicts with existing browser instances.", chromePort, chromePort, chromePort)
 	}
@@ -94,15 +110,23 @@ func NewDaemon(host string, port int) (*Daemon, error) {
 		return nil, fmt.Errorf("Chromium DevTools is responding on port %d but rod connection failed: %w\n\nThis is unexpected. Try restarting Chromium with:\n  chromium --remote-debugging-port=%d --user-data-dir=/tmp/chromium-debug &", chromePort, err, chromePort)
 	}
 	if debug {
 		log.Printf("[DEBUG] Successfully connected to browser via rod")
 	}
 	daemon := &Daemon{
 		browser:     browser,
 		tabs:        make(map[string]*rod.Page),
 		iframePages: make(map[string]*rod.Page),
 		tabHistory:  make([]string, 0),
 		consoleLogs: make(map[string][]ConsoleLog),
 		debug:       debug,
 	}
 	daemon.debugLog("Daemon struct initialized")
 	// Create HTTP server
 	daemon.debugLog("Setting up HTTP server")
 	mux := http.NewServeMux()
 	mux.HandleFunc("/command", daemon.handleCommand)
 	mux.HandleFunc("/status", daemon.handleStatus)
@@ -114,13 +138,18 @@ func NewDaemon(host string, port int) (*Daemon, error) {
 		Handler: mux,
 	}
 	daemon.debugLog("HTTP server configured on %s:%d", host, port)
 	return daemon, nil
 }
 // Start starts the daemon server
 func (d *Daemon) Start() error {
 	log.Printf("Starting daemon server on %s", d.server.Addr)
-	return d.server.ListenAndServe()
+	d.debugLog("About to call ListenAndServe()")
 	err := d.server.ListenAndServe()
 	d.debugLog("ListenAndServe() returned with error: %v", err)
 	return err
 }
 // Stop stops the daemon server
@@ -173,7 +202,10 @@ func (d *Daemon) handleStatus(w http.ResponseWriter, r *http.Request) {
 // handleCommand handles command requests
 func (d *Daemon) handleCommand(w http.ResponseWriter, r *http.Request) {
 	d.debugLog("Received HTTP request: %s %s", r.Method, r.URL.Path)
 	if r.Method != http.MethodPost {
 		d.debugLog("Invalid method: %s", r.Method)
 		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
 		return
 	}
@@ -181,10 +213,13 @@ func (d *Daemon) handleCommand(w http.ResponseWriter, r *http.Request) {
 	var cmd Command
 	err := json.NewDecoder(r.Body).Decode(&cmd)
 	if err != nil {
 		d.debugLog("Failed to decode JSON: %v", err)
 		http.Error(w, "Invalid request body", http.StatusBadRequest)
 		return
 	}
 	d.debugLog("Processing command: %s with params: %+v", cmd.Action, cmd.Params)
 	var response Response
 	switch cmd.Action {
@@ -510,20 +545,25 @@ func (d *Daemon) handleCommand(w http.ResponseWriter, r *http.Request) {
 		}
 	default:
 		d.debugLog("Unknown action: %s", cmd.Action)
 		response = Response{Success: false, Error: "Unknown action"}
 	}
 	d.debugLog("Command %s completed, sending response: success=%v", cmd.Action, response.Success)
 	w.Header().Set("Content-Type", "application/json")
 	json.NewEncoder(w).Encode(response)
 	d.debugLog("Response sent for command: %s", cmd.Action)
 }
 // openTab opens a new tab and returns its ID
 func (d *Daemon) openTab(timeout int) (string, error) {
 	d.debugLog("Opening new tab with timeout: %d", timeout)
 	d.mu.Lock()
 	defer d.mu.Unlock()
 	// Create a context with timeout if specified
 	if timeout > 0 {
 		d.debugLog("Using timeout context: %d seconds", timeout)
 		ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
 		defer cancel()
@@ -768,10 +808,13 @@ func (d *Daemon) closeTab(tabID string, timeout int) error {
 // loadURL loads a URL in a tab
 func (d *Daemon) loadURL(tabID, url string, timeout int) error {
 	d.debugLog("Loading URL: %s in tab: %s with timeout: %d", url, tabID, timeout)
 	page, err := d.getTab(tabID)
 	if err != nil {
 		d.debugLog("Failed to get tab %s: %v", tabID, err)
 		return err
 	}
 	d.debugLog("Got tab %s, starting navigation", tabID)
 	if timeout > 0 {
 		// Use timeout for the URL loading
--- a/mcp/LLM_MCP_GUIDE.md
+++ b/mcp/LLM_MCP_GUIDE.md
@@ -1,642 +0,0 @@
 # LLM Agent Guide: Using Cremote MCP Server for Web Automation
 This document provides comprehensive guidance for LLM agents on how to use the **Cremote MCP Server** for intelligent web automation. The MCP server provides a structured, stateful interface that's optimized for AI-driven web testing and automation workflows.
 ## What is the Cremote MCP Server?
 The **Cremote MCP Server** is a Model Context Protocol implementation that wraps cremote's web automation capabilities in a structured API designed specifically for LLMs. Unlike CLI commands, the MCP server provides:
 - **Automatic State Management**: Tracks current tab, tab history, and iframe context
 - **Intelligent Abstractions**: High-level tools that combine multiple operations
 - **Rich Error Context**: Detailed error information for better debugging
 - **Automatic Screenshots**: Built-in screenshot capture for documentation
 - **Structured Responses**: Consistent, parseable JSON responses
 ## Prerequisites
 Before using the MCP server, ensure the cremote infrastructure is running:
 1. **Check if everything is already running:**
   ```bash
   cremote status
   ```
 2. **Start Chromium with remote debugging (if needed):**
   ```bash
   chromium --remote-debugging-port=9222 --user-data-dir=/tmp/chromium-debug &
   ```
 3. **Start cremote daemon (if needed):**
   ```bash
   cremotedaemon &
   ```
 4. **The MCP server should be configured in your MCP client** (e.g., Claude Desktop)
 ## Available MCP Tools
 ### 1. `web_navigate` - Smart Navigation
 Navigate to URLs with automatic tab management and optional screenshot capture.
 **Parameters:**
 - `url` (required): URL to navigate to
 - `tab` (optional): Specific tab ID (uses current tab if not specified)
 - `screenshot` (optional): Take screenshot after navigation (default: false)
 - `timeout` (optional): Timeout in seconds (default: 5)
 **Example:**
 ```json
 {
  "name": "web_navigate",
  "arguments": {
    "url": "https://example.com/login",
    "screenshot": true,
    "timeout": 10
  }
 }
 ```
 **Smart Behavior:**
 - Automatically opens a new tab if none exists
 - Updates current tab tracking
 - Adds tab to history for easy switching
 ### 2. `web_interact` - Element Interactions
 Interact with web elements through a unified interface.
 **Parameters:**
 - `action` (required): "click", "fill", "submit", or "upload"
 - `selector` (required): CSS selector for the target element
 - `value` (optional): Value for fill/upload actions
 - `tab` (optional): Tab ID (uses current tab if not specified)
 - `timeout` (optional): Timeout in seconds (default: 5)
 **Examples:**
 ```json
 // Fill a form field
 {
  "name": "web_interact",
  "arguments": {
    "action": "fill",
    "selector": "#username",
    "value": "testuser"
  }
 }
 // Click a button
 {
  "name": "web_interact",
  "arguments": {
    "action": "click",
    "selector": "#login-button"
  }
 }
 // Submit a form
 {
  "name": "web_interact",
  "arguments": {
    "action": "submit",
    "selector": "form#login-form"
  }
 }
 // Upload a file
 {
  "name": "web_interact",
  "arguments": {
    "action": "upload",
    "selector": "input[type=file]",
    "value": "/path/to/file.pdf"
  }
 }
 ```
 ### 3. `web_extract` - Data Extraction
 Extract information from web pages through multiple methods.
 **Parameters:**
 - `type` (required): "source", "element", or "javascript"
 - `selector` (optional): CSS selector (required for "element" type)
 - `code` (optional): JavaScript code (required for "javascript" type)
 - `tab` (optional): Tab ID (uses current tab if not specified)
 - `timeout` (optional): Timeout in seconds (default: 5)
 **Examples:**
 ```json
 // Get page source
 {
  "name": "web_extract",
  "arguments": {
    "type": "source"
  }
 }
 // Get specific element HTML
 {
  "name": "web_extract",
  "arguments": {
    "type": "element",
    "selector": ".error-message"
  }
 }
 // Execute JavaScript and get result
 {
  "name": "web_extract",
  "arguments": {
    "type": "javascript",
    "code": "document.title"
  }
 }
 // Check form validation
 {
  "name": "web_extract",
  "arguments": {
    "type": "javascript",
    "code": "document.getElementById('email').validity.valid"
  }
 }
 ```
 ### 4. `web_screenshot` - Screenshot Capture
 Take screenshots for documentation and debugging.
 **Parameters:**
 - `output` (required): File path for the screenshot
 - `full_page` (optional): Capture full page vs viewport (default: false)
 - `tab` (optional): Tab ID (uses current tab if not specified)
 - `timeout` (optional): Timeout in seconds (default: 5)
 **Examples:**
 ```json
 // Viewport screenshot
 {
  "name": "web_screenshot",
  "arguments": {
    "output": "/tmp/login-page.png"
  }
 }
 // Full page screenshot
 {
  "name": "web_screenshot",
  "arguments": {
    "output": "/tmp/full-page.png",
    "full_page": true
  }
 }
 ```
 ### 5. `web_manage_tabs` - Tab Management
 Manage browser tabs with automatic state tracking.
 **Parameters:**
 - `action` (required): "open", "close", "list", or "switch"
 - `tab` (optional): Tab ID (required for "close" and "switch" actions)
 - `timeout` (optional): Timeout in seconds (default: 5)
 **Examples:**
 ```json
 // Open new tab
 {
  "name": "web_manage_tabs",
  "arguments": {
    "action": "open"
  }
 }
 // List all tabs
 {
  "name": "web_manage_tabs",
  "arguments": {
    "action": "list"
  }
 }
 // Switch to specific tab
 {
  "name": "web_manage_tabs",
  "arguments": {
    "action": "switch",
    "tab": "tab-id-123"
  }
 }
 // Close current tab
 {
  "name": "web_manage_tabs",
  "arguments": {
    "action": "close"
  }
 }
 ```
 ### 6. `web_iframe` - Iframe Context Management
 Switch between main page and iframe contexts for testing embedded content.
 **Parameters:**
 - `action` (required): "enter" or "exit"
 - `selector` (optional): Iframe CSS selector (required for "enter" action)
 - `tab` (optional): Tab ID (uses current tab if not specified)
 **Examples:**
 ```json
 // Enter iframe context
 {
  "name": "web_iframe",
  "arguments": {
    "action": "enter",
    "selector": "iframe#payment-form"
  }
 }
 // Exit iframe context (return to main page)
 {
  "name": "web_iframe",
  "arguments": {
    "action": "exit"
  }
 }
 ```
 ## Response Format
 All MCP tools return a consistent response structure:
 ```json
 {
  "success": true,
  "data": "...",                    // Tool-specific response data
  "screenshot": "/tmp/shot.png",    // Screenshot path (if captured)
  "current_tab": "tab-id-123",      // Current active tab
  "tab_history": ["tab-id-123"],    // Tab history stack
  "iframe_mode": false,             // Whether in iframe context
  "error": null,                    // Error message (if failed)
  "metadata": {}                    // Additional context information
 }
 ```
 ## Common Automation Patterns
 ### 1. Login Flow Testing
 ```json
 // 1. Navigate to login page with screenshot
 {
  "name": "web_navigate",
  "arguments": {
    "url": "https://myapp.com/login",
    "screenshot": true
  }
 }
 // 2. Fill credentials
 {
  "name": "web_interact",
  "arguments": {
    "action": "fill",
    "selector": "#email",
    "value": "user@example.com"
  }
 }
 {
  "name": "web_interact",
  "arguments": {
    "action": "fill",
    "selector": "#password",
    "value": "password123"
  }
 }
 // 3. Submit login
 {
  "name": "web_interact",
  "arguments": {
    "action": "click",
    "selector": "#login-button"
  }
 }
 // 4. Verify success
 {
  "name": "web_extract",
  "arguments": {
    "type": "javascript",
    "code": "document.querySelector('.welcome-message')?.textContent"
  }
 }
 // 5. Document result
 {
  "name": "web_screenshot",
  "arguments": {
    "output": "/tmp/login-success.png"
  }
 }
 ```
 ### 2. Form Validation Testing
 ```json
 // 1. Navigate to form
 {
  "name": "web_navigate",
  "arguments": {
    "url": "https://myapp.com/register"
  }
 }
 // 2. Test empty form submission
 {
  "name": "web_interact",
  "arguments": {
    "action": "click",
    "selector": "#submit-button"
  }
 }
 // 3. Check for validation errors
 {
  "name": "web_extract",
  "arguments": {
    "type": "element",
    "selector": ".error-message"
  }
 }
 // 4. Test invalid email
 {
  "name": "web_interact",
  "arguments": {
    "action": "fill",
    "selector": "#email",
    "value": "invalid-email"
  }
 }
 // 5. Verify JavaScript validation
 {
  "name": "web_extract",
  "arguments": {
    "type": "javascript",
    "code": "document.getElementById('email').validity.valid"
  }
 }
 ```
 ### 3. Multi-Tab Workflow
 ```json
 // 1. Open multiple tabs for comparison
 {
  "name": "web_manage_tabs",
  "arguments": {
    "action": "open"
  }
 }
 {
  "name": "web_navigate",
  "arguments": {
    "url": "https://app.com/admin"
  }
 }
 {
  "name": "web_manage_tabs",
  "arguments": {
    "action": "open"
  }
 }
 {
  "name": "web_navigate",
  "arguments": {
    "url": "https://app.com/user"
  }
 }
 // 2. List tabs to see current state
 {
  "name": "web_manage_tabs",
  "arguments": {
    "action": "list"
  }
 }
 // 3. Switch between tabs as needed
 {
  "name": "web_manage_tabs",
  "arguments": {
    "action": "switch",
    "tab": "first-tab-id"
  }
 }
 ```
 ### 4. Iframe Testing (Payment Forms, Widgets)
 ```json
 // 1. Navigate to page with iframe
 {
  "name": "web_navigate",
  "arguments": {
    "url": "https://shop.com/checkout"
  }
 }
 // 2. Enter iframe context
 {
  "name": "web_iframe",
  "arguments": {
    "action": "enter",
    "selector": "iframe.payment-frame"
  }
 }
 // 3. Interact with iframe content
 {
  "name": "web_interact",
  "arguments": {
    "action": "fill",
    "selector": "#card-number",
    "value": "4111111111111111"
  }
 }
 {
  "name": "web_interact",
  "arguments": {
    "action": "fill",
    "selector": "#expiry",
    "value": "12/25"
  }
 }
 // 4. Exit iframe context
 {
  "name": "web_iframe",
  "arguments": {
    "action": "exit"
  }
 }
 // 5. Continue with main page
 {
  "name": "web_interact",
  "arguments": {
    "action": "click",
    "selector": "#complete-order"
  }
 }
 ```
 ## Best Practices for LLMs
 ### 1. State Awareness
 - The MCP server automatically tracks state, but always check the response for current context
 - Use the `current_tab` and `iframe_mode` fields to understand your current position
 - The `tab_history` helps you understand available tabs
 ### 2. Error Handling
 - Always check the `success` field in responses
 - Use the `error` field for detailed error information
 - Take screenshots when errors occur for debugging: `"screenshot": true`
 ### 3. Timeout Management
 - Use longer timeouts for slow-loading pages or complex interactions
 - Default 5-second timeouts work for most scenarios
 - Increase timeouts for file uploads or heavy JavaScript applications
 ### 4. Screenshot Strategy
 - Take screenshots at key points for documentation
 - Use `full_page: true` for comprehensive page captures
 - Screenshot before and after critical actions for debugging
 ### 5. Verification Patterns
 - Always verify actions completed successfully
 - Use JavaScript extraction to check application state
 - Combine element extraction with JavaScript validation
 ## Debugging Failed Tests
 ### 1. Capture Current State
 ```json
 // Get page source for analysis
 {
  "name": "web_extract",
  "arguments": {
    "type": "source"
  }
 }
 // Take screenshot to see visual state
 {
  "name": "web_screenshot",
  "arguments": {
    "output": "/tmp/debug-state.png",
    "full_page": true
  }
 }
 // Check JavaScript console errors
 {
  "name": "web_extract",
  "arguments": {
    "type": "javascript",
    "code": "console.error.toString()"
  }
 }
 ```
 ### 2. Element Debugging
 ```json
 // Check if element exists
 {
  "name": "web_extract",
  "arguments": {
    "type": "javascript",
    "code": "document.querySelector('#my-element') !== null"
  }
 }
 // Get element properties
 {
  "name": "web_extract",
  "arguments": {
    "type": "javascript",
    "code": "JSON.stringify({visible: document.querySelector('#my-element')?.offsetParent !== null, text: document.querySelector('#my-element')?.textContent})"
  }
 }
 ```
 ### 3. Network and Loading Issues
 ```json
 // Check if page is still loading
 {
  "name": "web_extract",
  "arguments": {
    "type": "javascript",
    "code": "document.readyState"
  }
 }
 // Check for JavaScript errors
 {
  "name": "web_extract",
  "arguments": {
    "type": "javascript",
    "code": "window.onerror ? 'Errors detected' : 'No errors'"
  }
 }
 ```
 ## Advantages Over CLI Commands
 ### 1. **Automatic State Management**
 - No need to manually track tab IDs
 - Automatic current tab resolution
 - Persistent iframe context tracking
 ### 2. **Rich Error Context**
 - Detailed error messages with context
 - Automatic screenshot capture on failures
 - Structured error responses for better debugging
 ### 3. **Intelligent Abstractions**
 - Combined operations in single tools
 - Smart parameter defaults and validation
 - Automatic resource management
 ### 4. **Better Performance**
 - Direct library integration (no subprocess overhead)
 - Persistent connections to cremote daemon
 - Efficient state tracking
 ### 5. **Structured Responses**
 - Consistent JSON format for all responses
 - Rich metadata for decision making
 - Easy parsing and error handling
 ## Key Differences from CLI Usage
 | Aspect | CLI Commands | MCP Server |
 |--------|-------------|------------|
 | **State Tracking** | Manual tab ID management | Automatic state management |
 | **Error Handling** | Text parsing required | Structured error objects |
 | **Screenshots** | Manual command execution | Automatic capture options |
 | **Performance** | Subprocess overhead | Direct library calls |
 | **Response Format** | Text output | Structured JSON |
 | **Context Management** | Manual iframe tracking | Automatic context switching |
 | **Resource Cleanup** | Manual tab management | Automatic resource tracking |
 The Cremote MCP Server transforms web automation from a series of CLI commands into an intelligent, stateful API that's optimized for AI-driven testing and automation workflows.
--- a/mcp/cremote-mcp2
+++ b/mcp/cremote-mcp2