diff --git a/client/client.go b/client/client.go index 5211f29..c464e83 100644 --- a/client/client.go +++ b/client/client.go @@ -726,3 +726,81 @@ func (c *Client) DownloadFileFromContainer(containerPath, localPath string) erro return nil } + +// GetConsoleLogs retrieves console logs from a tab +// If tabID is empty, the current tab will be used +// If clear is true, the logs will be cleared after retrieval +func (c *Client) GetConsoleLogs(tabID string, clear bool) ([]map[string]interface{}, error) { + params := map[string]string{} + + // Only include tab ID if it's provided + if tabID != "" { + params["tab"] = tabID + } + + // Add clear flag if specified + if clear { + params["clear"] = "true" + } + + resp, err := c.SendCommand("console-logs", params) + if err != nil { + return nil, err + } + + if !resp.Success { + return nil, fmt.Errorf("failed to get console logs: %s", resp.Error) + } + + // Convert response data to slice of console logs + logs, ok := resp.Data.([]interface{}) + if !ok { + return nil, fmt.Errorf("unexpected response data type") + } + + result := make([]map[string]interface{}, len(logs)) + for i, log := range logs { + logMap, ok := log.(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("unexpected log entry format") + } + result[i] = logMap + } + + return result, nil +} + +// ExecuteConsoleCommand executes a command in the browser console +// If tabID is empty, the current tab will be used +// timeout is in seconds, 0 means no timeout +func (c *Client) ExecuteConsoleCommand(tabID, command string, timeout int) (string, error) { + params := map[string]string{ + "command": command, + } + + // Only include tab ID if it's provided + if tabID != "" { + params["tab"] = tabID + } + + // Add timeout if specified + if timeout > 0 { + params["timeout"] = strconv.Itoa(timeout) + } + + resp, err := c.SendCommand("console-command", params) + if err != nil { + return "", err + } + + if !resp.Success { + return "", fmt.Errorf("failed to execute console command: %s", resp.Error) + } + + result, ok := resp.Data.(string) + if !ok { + return "", fmt.Errorf("unexpected response data type") + } + + return result, nil +} diff --git a/daemon/daemon.go b/daemon/daemon.go index a2abce2..d334988 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -22,13 +22,22 @@ import ( type Daemon struct { browser *rod.Browser tabs map[string]*rod.Page - iframePages map[string]*rod.Page // Maps tab ID to iframe page context - currentTab string // ID of the current/last used tab - tabHistory []string // Stack of tab IDs in order of activation (LIFO) + iframePages map[string]*rod.Page // Maps tab ID to iframe page context + currentTab string // ID of the current/last used tab + tabHistory []string // Stack of tab IDs in order of activation (LIFO) + consoleLogs map[string][]ConsoleLog // Maps tab ID to console logs mu sync.Mutex server *http.Server } +// ConsoleLog represents a console log entry +type ConsoleLog struct { + Level string `json:"level"` // log, warn, error, info, debug + Message string `json:"message"` // The log message + Timestamp time.Time `json:"timestamp"` // When the log occurred + Source string `json:"source"` // Source location if available +} + // Command represents a command sent from the client to the daemon type Command struct { Action string `json:"action"` @@ -90,6 +99,7 @@ func NewDaemon(host string, port int) (*Daemon, error) { tabs: make(map[string]*rod.Page), iframePages: make(map[string]*rod.Page), tabHistory: make([]string, 0), + consoleLogs: make(map[string][]ConsoleLog), } // Create HTTP server @@ -462,6 +472,43 @@ func (d *Daemon) handleCommand(w http.ResponseWriter, r *http.Request) { response = Response{Success: true} } + case "console-logs": + tabID := cmd.Params["tab"] + clearStr := cmd.Params["clear"] + + // Parse clear flag + clear := false + if clearStr == "true" { + clear = true + } + + logs, err := d.getConsoleLogs(tabID, clear) + if err != nil { + response = Response{Success: false, Error: err.Error()} + } else { + response = Response{Success: true, Data: logs} + } + + case "console-command": + tabID := cmd.Params["tab"] + command := cmd.Params["command"] + timeoutStr := cmd.Params["timeout"] + + // Parse timeout (default to 5 seconds if not specified) + timeout := 5 + if timeoutStr != "" { + if parsedTimeout, err := strconv.Atoi(timeoutStr); err == nil && parsedTimeout > 0 { + timeout = parsedTimeout + } + } + + result, err := d.executeConsoleCommand(tabID, command, timeout) + if err != nil { + response = Response{Success: false, Error: err.Error()} + } else { + response = Response{Success: true, Data: result} + } + default: response = Response{Success: false, Error: "Unknown action"} } @@ -513,6 +560,9 @@ func (d *Daemon) openTab(timeout int) (string, error) { d.tabHistory = append(d.tabHistory, res.tabID) d.currentTab = res.tabID + // Set up console logging for this tab + d.setupConsoleLogging(res.tabID, res.page) + return res.tabID, nil case <-ctx.Done(): return "", fmt.Errorf("opening tab timed out after %d seconds", timeout) @@ -532,6 +582,9 @@ func (d *Daemon) openTab(timeout int) (string, error) { d.tabHistory = append(d.tabHistory, tabID) d.currentTab = tabID + // Set up console logging for this tab + d.setupConsoleLogging(tabID, page) + return tabID, nil } } @@ -1742,3 +1795,99 @@ func (d *Daemon) handleFileDownload(w http.ResponseWriter, r *http.Request) { log.Printf("Error streaming file to client: %v", err) } } + +// setupConsoleLogging sets up console log capture for a tab +func (d *Daemon) setupConsoleLogging(tabID string, page *rod.Page) { + // Initialize console logs for this tab + d.consoleLogs[tabID] = make([]ConsoleLog, 0) + + // Listen for console events + go page.EachEvent(func(e *proto.RuntimeConsoleAPICalled) { + d.mu.Lock() + defer d.mu.Unlock() + + // Convert console level + level := string(e.Type) + + // Build message from arguments + var message string + for i, arg := range e.Args { + if i > 0 { + message += " " + } + // Handle different argument types + if !arg.Value.Nil() { + message += arg.Value.String() + } else if arg.Description != "" { + message += arg.Description + } else { + message += "[object]" + } + } + + // Create console log entry + logEntry := ConsoleLog{ + Level: level, + Message: message, + Timestamp: time.Now(), + Source: "", // Could be enhanced with stack trace info + } + + // Add to console logs (keep last 1000 entries per tab) + logs := d.consoleLogs[tabID] + logs = append(logs, logEntry) + if len(logs) > 1000 { + logs = logs[1:] // Remove oldest entry + } + d.consoleLogs[tabID] = logs + })() +} + +// getConsoleLogs retrieves console logs for a tab +func (d *Daemon) getConsoleLogs(tabID string, clear bool) ([]ConsoleLog, error) { + d.mu.Lock() + defer d.mu.Unlock() + + // Use current tab if not specified + if tabID == "" { + tabID = d.currentTab + } + + if tabID == "" { + return nil, fmt.Errorf("no tab specified and no current tab available") + } + + // Check if tab exists + if _, exists := d.tabs[tabID]; !exists { + return nil, fmt.Errorf("tab not found: %s", tabID) + } + + // Get logs for this tab + logs, exists := d.consoleLogs[tabID] + if !exists { + logs = make([]ConsoleLog, 0) + } + + // Clear logs if requested + if clear { + d.consoleLogs[tabID] = make([]ConsoleLog, 0) + } + + return logs, nil +} + +// executeConsoleCommand executes a command in the browser console +func (d *Daemon) executeConsoleCommand(tabID, command string, timeout int) (string, error) { + // Use current tab if not specified + if tabID == "" { + tabID = d.currentTab + } + + if tabID == "" { + return "", fmt.Errorf("no tab specified and no current tab available") + } + + // Execute the command as JavaScript and return the result + // This is similar to evalJS but specifically for console commands + return d.evalJS(tabID, command, timeout) +} diff --git a/downloaded-test.txt b/downloaded-test.txt new file mode 100644 index 0000000..3e9b528 --- /dev/null +++ b/downloaded-test.txt @@ -0,0 +1 @@ +This is a test file for cremote file transfer diff --git a/mcp/LLM_USAGE_GUIDE.md b/mcp/LLM_USAGE_GUIDE.md index 8825908..ea5ba2d 100644 --- a/mcp/LLM_USAGE_GUIDE.md +++ b/mcp/LLM_USAGE_GUIDE.md @@ -4,7 +4,7 @@ This guide explains how LLMs can use the cremote MCP (Model Context Protocol) to ## Available Tools -The cremote MCP server provides six comprehensive web automation tools: +The cremote MCP server provides ten comprehensive web automation, file transfer, and console debugging tools: ### 1. `web_navigate_cremotemcp` Navigate to URLs and optionally take screenshots. @@ -123,6 +123,73 @@ web_iframe_cremotemcp: action: "exit" ``` +### 7. `file_upload_cremotemcp` +Upload files from the client to the container for use in form uploads. + +**Parameters:** +- `local_path` (required): Path to the file on the client machine +- `container_path` (optional): Path where to store the file in the container (defaults to /tmp/filename) + +**Example Usage:** +``` +file_upload_cremotemcp: + local_path: "/home/user/document.pdf" + container_path: "/tmp/upload.pdf" + +file_upload_cremotemcp: + local_path: "/home/user/image.jpg" + # Will default to /tmp/image.jpg in container +``` + +### 8. `file_download_cremotemcp` +Download files from the container to the client (e.g., downloaded files from browser). + +**Parameters:** +- `container_path` (required): Path to the file in the container +- `local_path` (required): Path where to save the file on the client machine + +**Example Usage:** +``` +file_download_cremotemcp: + container_path: "/tmp/downloaded-report.pdf" + local_path: "/home/user/Downloads/report.pdf" +``` + +### 9. `console_logs_cremotemcp` +Get console logs from the browser tab for debugging. + +**Parameters:** +- `tab` (optional): Specific tab ID to use +- `clear` (optional): Clear logs after retrieval (default: false) + +**Example Usage:** +``` +console_logs_cremotemcp: + clear: true + +console_logs_cremotemcp: + tab: "ABC123" + clear: false +``` + +### 10. `console_command_cremotemcp` +Execute JavaScript commands in the browser console. + +**Parameters:** +- `command` (required): JavaScript command to execute +- `tab` (optional): Specific tab ID to use +- `timeout` (optional): Timeout in seconds (default: 5) + +**Example Usage:** +``` +console_command_cremotemcp: + command: "console.log('Hello from MCP!')" + +console_command_cremotemcp: + command: "document.querySelector('h1').textContent" + timeout: 10 +``` + ## Common Usage Patterns ### 1. Basic Web Navigation @@ -277,6 +344,100 @@ web_navigate_cremotemcp: screenshot: true ``` +## Example: File Upload Workflow + +Here's how to upload a file and use it in a web form: + +``` +# Step 1: Upload a file from client to container +file_upload_cremotemcp: + local_path: "/home/user/resume.pdf" + container_path: "/tmp/resume.pdf" + +# Step 2: Navigate to upload form +web_navigate_cremotemcp: + url: "https://jobsite.com/apply" + +# Step 3: Fill out application form +web_interact_cremotemcp: + action: "fill" + selector: "input[name='name']" + value: "Jane Smith" + +# Step 4: Upload the file using the container path +web_interact_cremotemcp: + action: "upload" + selector: "input[type='file']" + value: "/tmp/resume.pdf" + +# Step 5: Submit the application +web_interact_cremotemcp: + action: "submit" + selector: "form.application" +``` + +## Example: Download Files from Browser + +Here's how to download files that the browser saves: + +``` +# Step 1: Navigate to download page +web_navigate_cremotemcp: + url: "https://example.com/reports" + +# Step 2: Click download link +web_interact_cremotemcp: + action: "click" + selector: "a.download-report" + +# Step 3: Wait a moment for download to complete +# (In practice, you might need to check for file existence) + +# Step 4: Download the file from container to client +file_download_cremotemcp: + container_path: "/tmp/downloads/report.pdf" + local_path: "/home/user/Downloads/report.pdf" +``` + +## Example: Console Debugging Workflow + +Here's how to use console tools for debugging web automation: + +``` +# Step 1: Navigate to a page +web_navigate_cremotemcp: + url: "https://example.com/form" + +# Step 2: Check what's in the console initially +console_logs_cremotemcp: + clear: false + +# Step 3: Execute some debugging commands +console_command_cremotemcp: + command: "console.log('Starting form automation')" + +console_command_cremotemcp: + command: "document.querySelectorAll('input').length" + +# Step 4: Try to interact with form +web_interact_cremotemcp: + action: "fill" + selector: "input[name='email']" + value: "test@example.com" + +# Step 5: Check for any console errors after interaction +console_logs_cremotemcp: + clear: false + +# Step 6: Debug element selection +console_command_cremotemcp: + command: "document.querySelector('input[name=\"email\"]') ? 'Found' : 'Not found'" + +# Step 7: Check element properties +console_command_cremotemcp: + command: "document.querySelector('input[name=\"email\"]').value" +``` + ## Integration Notes - Tools use the `_cremotemcp` suffix to avoid naming conflicts diff --git a/mcp/QUICK_REFERENCE.md b/mcp/QUICK_REFERENCE.md index ce17d5d..047cba1 100644 --- a/mcp/QUICK_REFERENCE.md +++ b/mcp/QUICK_REFERENCE.md @@ -7,6 +7,10 @@ - `web_screenshot_cremotemcp` - Take screenshots - `web_manage_tabs_cremotemcp` - Manage browser tabs - `web_iframe_cremotemcp` - Switch iframe context +- `file_upload_cremotemcp` - Upload files to container +- `file_download_cremotemcp` - Download files from container +- `console_logs_cremotemcp` - Get browser console logs +- `console_command_cremotemcp` - Execute console commands ## Essential Parameters @@ -64,6 +68,33 @@ web_interact_cremotemcp: value: "/path/to/file.pdf" ``` +### Upload File to Container +```yaml +file_upload_cremotemcp: + local_path: "/home/user/document.pdf" + container_path: "/tmp/upload.pdf" +``` + +### Download File from Container +```yaml +file_download_cremotemcp: + container_path: "/tmp/downloaded.pdf" + local_path: "/home/user/Downloads/file.pdf" +``` + +### Get Console Logs +```yaml +console_logs_cremotemcp: + clear: true +``` + +### Execute Console Command +```yaml +console_command_cremotemcp: + command: "document.title" + timeout: 10 +``` + ## Best CSS Selectors ✅ **Good:** diff --git a/mcp/cremote-mcp b/mcp/cremote-mcp index 4c1c2c8..88a26e9 100755 Binary files a/mcp/cremote-mcp and b/mcp/cremote-mcp differ diff --git a/mcp/main.go b/mcp/main.go index 1227787..8f1bc18 100644 --- a/mcp/main.go +++ b/mcp/main.go @@ -687,6 +687,115 @@ func main() { }, nil }) + // Register console_logs tool + mcpServer.AddTool(mcp.Tool{ + Name: "console_logs", + Description: "Get console logs from the browser tab", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "tab": map[string]any{ + "type": "string", + "description": "Tab ID (optional, uses current tab)", + }, + "clear": map[string]any{ + "type": "boolean", + "description": "Clear logs after retrieval (default: false)", + "default": false, + }, + }, + Required: []string{}, + }, + }, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + // Convert arguments to map + params, ok := request.Params.Arguments.(map[string]any) + if !ok { + return nil, fmt.Errorf("invalid arguments format") + } + + tab := getStringParam(params, "tab", cremoteServer.currentTab) + clear := getBoolParam(params, "clear", false) + + // Get console logs + logs, err := cremoteServer.client.GetConsoleLogs(tab, clear) + if err != nil { + return nil, fmt.Errorf("failed to get console logs: %w", err) + } + + // Format logs for display + var logText string + if len(logs) == 0 { + logText = "No console logs found." + } else { + logText = fmt.Sprintf("Found %d console log entries:\n\n", len(logs)) + for i, log := range logs { + level := log["level"].(string) + message := log["message"].(string) + timestamp := log["timestamp"].(string) + logText += fmt.Sprintf("[%d] %s [%s]: %s\n", i+1, timestamp, level, message) + } + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.NewTextContent(logText), + }, + IsError: false, + }, nil + }) + + // Register console_command tool + mcpServer.AddTool(mcp.Tool{ + Name: "console_command", + Description: "Execute a command in the browser console", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "command": map[string]any{ + "type": "string", + "description": "JavaScript command to execute in console", + }, + "tab": map[string]any{ + "type": "string", + "description": "Tab ID (optional, uses current tab)", + }, + "timeout": map[string]any{ + "type": "integer", + "description": "Timeout in seconds (default: 5)", + "default": 5, + }, + }, + Required: []string{"command"}, + }, + }, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + // Convert arguments to map + params, ok := request.Params.Arguments.(map[string]any) + if !ok { + return nil, fmt.Errorf("invalid arguments format") + } + + command := getStringParam(params, "command", "") + tab := getStringParam(params, "tab", cremoteServer.currentTab) + timeout := getIntParam(params, "timeout", 5) + + if command == "" { + return nil, fmt.Errorf("command parameter is required") + } + + // Execute console command + result, err := cremoteServer.client.ExecuteConsoleCommand(tab, command, timeout) + if err != nil { + return nil, fmt.Errorf("failed to execute console command: %w", err) + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.NewTextContent(fmt.Sprintf("Console command executed successfully.\nCommand: %s\nResult: %s", command, result)), + }, + IsError: false, + }, nil + }) + // Start the server log.Printf("Cremote MCP server ready") if err := server.ServeStdio(mcpServer); err != nil {