This commit is contained in:
Josh at WLTechBlog 2025-08-15 07:41:30 -05:00
parent dc6b288aa4
commit efab3cc11e
7 changed files with 533 additions and 4 deletions

View File

@ -726,3 +726,81 @@ func (c *Client) DownloadFileFromContainer(containerPath, localPath string) erro
return nil
}
// GetConsoleLogs retrieves console logs from a tab
// If tabID is empty, the current tab will be used
// If clear is true, the logs will be cleared after retrieval
func (c *Client) GetConsoleLogs(tabID string, clear bool) ([]map[string]interface{}, error) {
params := map[string]string{}
// Only include tab ID if it's provided
if tabID != "" {
params["tab"] = tabID
}
// Add clear flag if specified
if clear {
params["clear"] = "true"
}
resp, err := c.SendCommand("console-logs", params)
if err != nil {
return nil, err
}
if !resp.Success {
return nil, fmt.Errorf("failed to get console logs: %s", resp.Error)
}
// Convert response data to slice of console logs
logs, ok := resp.Data.([]interface{})
if !ok {
return nil, fmt.Errorf("unexpected response data type")
}
result := make([]map[string]interface{}, len(logs))
for i, log := range logs {
logMap, ok := log.(map[string]interface{})
if !ok {
return nil, fmt.Errorf("unexpected log entry format")
}
result[i] = logMap
}
return result, nil
}
// ExecuteConsoleCommand executes a command in the browser console
// If tabID is empty, the current tab will be used
// timeout is in seconds, 0 means no timeout
func (c *Client) ExecuteConsoleCommand(tabID, command string, timeout int) (string, error) {
params := map[string]string{
"command": command,
}
// Only include tab ID if it's provided
if tabID != "" {
params["tab"] = tabID
}
// Add timeout if specified
if timeout > 0 {
params["timeout"] = strconv.Itoa(timeout)
}
resp, err := c.SendCommand("console-command", params)
if err != nil {
return "", err
}
if !resp.Success {
return "", fmt.Errorf("failed to execute console command: %s", resp.Error)
}
result, ok := resp.Data.(string)
if !ok {
return "", fmt.Errorf("unexpected response data type")
}
return result, nil
}

View File

@ -22,13 +22,22 @@ import (
type Daemon struct {
browser *rod.Browser
tabs map[string]*rod.Page
iframePages map[string]*rod.Page // Maps tab ID to iframe page context
currentTab string // ID of the current/last used tab
tabHistory []string // Stack of tab IDs in order of activation (LIFO)
iframePages map[string]*rod.Page // Maps tab ID to iframe page context
currentTab string // ID of the current/last used tab
tabHistory []string // Stack of tab IDs in order of activation (LIFO)
consoleLogs map[string][]ConsoleLog // Maps tab ID to console logs
mu sync.Mutex
server *http.Server
}
// ConsoleLog represents a console log entry
type ConsoleLog struct {
Level string `json:"level"` // log, warn, error, info, debug
Message string `json:"message"` // The log message
Timestamp time.Time `json:"timestamp"` // When the log occurred
Source string `json:"source"` // Source location if available
}
// Command represents a command sent from the client to the daemon
type Command struct {
Action string `json:"action"`
@ -90,6 +99,7 @@ func NewDaemon(host string, port int) (*Daemon, error) {
tabs: make(map[string]*rod.Page),
iframePages: make(map[string]*rod.Page),
tabHistory: make([]string, 0),
consoleLogs: make(map[string][]ConsoleLog),
}
// Create HTTP server
@ -462,6 +472,43 @@ func (d *Daemon) handleCommand(w http.ResponseWriter, r *http.Request) {
response = Response{Success: true}
}
case "console-logs":
tabID := cmd.Params["tab"]
clearStr := cmd.Params["clear"]
// Parse clear flag
clear := false
if clearStr == "true" {
clear = true
}
logs, err := d.getConsoleLogs(tabID, clear)
if err != nil {
response = Response{Success: false, Error: err.Error()}
} else {
response = Response{Success: true, Data: logs}
}
case "console-command":
tabID := cmd.Params["tab"]
command := cmd.Params["command"]
timeoutStr := cmd.Params["timeout"]
// Parse timeout (default to 5 seconds if not specified)
timeout := 5
if timeoutStr != "" {
if parsedTimeout, err := strconv.Atoi(timeoutStr); err == nil && parsedTimeout > 0 {
timeout = parsedTimeout
}
}
result, err := d.executeConsoleCommand(tabID, command, timeout)
if err != nil {
response = Response{Success: false, Error: err.Error()}
} else {
response = Response{Success: true, Data: result}
}
default:
response = Response{Success: false, Error: "Unknown action"}
}
@ -513,6 +560,9 @@ func (d *Daemon) openTab(timeout int) (string, error) {
d.tabHistory = append(d.tabHistory, res.tabID)
d.currentTab = res.tabID
// Set up console logging for this tab
d.setupConsoleLogging(res.tabID, res.page)
return res.tabID, nil
case <-ctx.Done():
return "", fmt.Errorf("opening tab timed out after %d seconds", timeout)
@ -532,6 +582,9 @@ func (d *Daemon) openTab(timeout int) (string, error) {
d.tabHistory = append(d.tabHistory, tabID)
d.currentTab = tabID
// Set up console logging for this tab
d.setupConsoleLogging(tabID, page)
return tabID, nil
}
}
@ -1742,3 +1795,99 @@ func (d *Daemon) handleFileDownload(w http.ResponseWriter, r *http.Request) {
log.Printf("Error streaming file to client: %v", err)
}
}
// setupConsoleLogging sets up console log capture for a tab
func (d *Daemon) setupConsoleLogging(tabID string, page *rod.Page) {
// Initialize console logs for this tab
d.consoleLogs[tabID] = make([]ConsoleLog, 0)
// Listen for console events
go page.EachEvent(func(e *proto.RuntimeConsoleAPICalled) {
d.mu.Lock()
defer d.mu.Unlock()
// Convert console level
level := string(e.Type)
// Build message from arguments
var message string
for i, arg := range e.Args {
if i > 0 {
message += " "
}
// Handle different argument types
if !arg.Value.Nil() {
message += arg.Value.String()
} else if arg.Description != "" {
message += arg.Description
} else {
message += "[object]"
}
}
// Create console log entry
logEntry := ConsoleLog{
Level: level,
Message: message,
Timestamp: time.Now(),
Source: "", // Could be enhanced with stack trace info
}
// Add to console logs (keep last 1000 entries per tab)
logs := d.consoleLogs[tabID]
logs = append(logs, logEntry)
if len(logs) > 1000 {
logs = logs[1:] // Remove oldest entry
}
d.consoleLogs[tabID] = logs
})()
}
// getConsoleLogs retrieves console logs for a tab
func (d *Daemon) getConsoleLogs(tabID string, clear bool) ([]ConsoleLog, error) {
d.mu.Lock()
defer d.mu.Unlock()
// Use current tab if not specified
if tabID == "" {
tabID = d.currentTab
}
if tabID == "" {
return nil, fmt.Errorf("no tab specified and no current tab available")
}
// Check if tab exists
if _, exists := d.tabs[tabID]; !exists {
return nil, fmt.Errorf("tab not found: %s", tabID)
}
// Get logs for this tab
logs, exists := d.consoleLogs[tabID]
if !exists {
logs = make([]ConsoleLog, 0)
}
// Clear logs if requested
if clear {
d.consoleLogs[tabID] = make([]ConsoleLog, 0)
}
return logs, nil
}
// executeConsoleCommand executes a command in the browser console
func (d *Daemon) executeConsoleCommand(tabID, command string, timeout int) (string, error) {
// Use current tab if not specified
if tabID == "" {
tabID = d.currentTab
}
if tabID == "" {
return "", fmt.Errorf("no tab specified and no current tab available")
}
// Execute the command as JavaScript and return the result
// This is similar to evalJS but specifically for console commands
return d.evalJS(tabID, command, timeout)
}

1
downloaded-test.txt Normal file
View File

@ -0,0 +1 @@
This is a test file for cremote file transfer

View File

@ -4,7 +4,7 @@ This guide explains how LLMs can use the cremote MCP (Model Context Protocol) to
## Available Tools
The cremote MCP server provides six comprehensive web automation tools:
The cremote MCP server provides ten comprehensive web automation, file transfer, and console debugging tools:
### 1. `web_navigate_cremotemcp`
Navigate to URLs and optionally take screenshots.
@ -123,6 +123,73 @@ web_iframe_cremotemcp:
action: "exit"
```
### 7. `file_upload_cremotemcp`
Upload files from the client to the container for use in form uploads.
**Parameters:**
- `local_path` (required): Path to the file on the client machine
- `container_path` (optional): Path where to store the file in the container (defaults to /tmp/filename)
**Example Usage:**
```
file_upload_cremotemcp:
local_path: "/home/user/document.pdf"
container_path: "/tmp/upload.pdf"
file_upload_cremotemcp:
local_path: "/home/user/image.jpg"
# Will default to /tmp/image.jpg in container
```
### 8. `file_download_cremotemcp`
Download files from the container to the client (e.g., downloaded files from browser).
**Parameters:**
- `container_path` (required): Path to the file in the container
- `local_path` (required): Path where to save the file on the client machine
**Example Usage:**
```
file_download_cremotemcp:
container_path: "/tmp/downloaded-report.pdf"
local_path: "/home/user/Downloads/report.pdf"
```
### 9. `console_logs_cremotemcp`
Get console logs from the browser tab for debugging.
**Parameters:**
- `tab` (optional): Specific tab ID to use
- `clear` (optional): Clear logs after retrieval (default: false)
**Example Usage:**
```
console_logs_cremotemcp:
clear: true
console_logs_cremotemcp:
tab: "ABC123"
clear: false
```
### 10. `console_command_cremotemcp`
Execute JavaScript commands in the browser console.
**Parameters:**
- `command` (required): JavaScript command to execute
- `tab` (optional): Specific tab ID to use
- `timeout` (optional): Timeout in seconds (default: 5)
**Example Usage:**
```
console_command_cremotemcp:
command: "console.log('Hello from MCP!')"
console_command_cremotemcp:
command: "document.querySelector('h1').textContent"
timeout: 10
```
## Common Usage Patterns
### 1. Basic Web Navigation
@ -277,6 +344,100 @@ web_navigate_cremotemcp:
screenshot: true
```
## Example: File Upload Workflow
Here's how to upload a file and use it in a web form:
```
# Step 1: Upload a file from client to container
file_upload_cremotemcp:
local_path: "/home/user/resume.pdf"
container_path: "/tmp/resume.pdf"
# Step 2: Navigate to upload form
web_navigate_cremotemcp:
url: "https://jobsite.com/apply"
# Step 3: Fill out application form
web_interact_cremotemcp:
action: "fill"
selector: "input[name='name']"
value: "Jane Smith"
# Step 4: Upload the file using the container path
web_interact_cremotemcp:
action: "upload"
selector: "input[type='file']"
value: "/tmp/resume.pdf"
# Step 5: Submit the application
web_interact_cremotemcp:
action: "submit"
selector: "form.application"
```
## Example: Download Files from Browser
Here's how to download files that the browser saves:
```
# Step 1: Navigate to download page
web_navigate_cremotemcp:
url: "https://example.com/reports"
# Step 2: Click download link
web_interact_cremotemcp:
action: "click"
selector: "a.download-report"
# Step 3: Wait a moment for download to complete
# (In practice, you might need to check for file existence)
# Step 4: Download the file from container to client
file_download_cremotemcp:
container_path: "/tmp/downloads/report.pdf"
local_path: "/home/user/Downloads/report.pdf"
```
## Example: Console Debugging Workflow
Here's how to use console tools for debugging web automation:
```
# Step 1: Navigate to a page
web_navigate_cremotemcp:
url: "https://example.com/form"
# Step 2: Check what's in the console initially
console_logs_cremotemcp:
clear: false
# Step 3: Execute some debugging commands
console_command_cremotemcp:
command: "console.log('Starting form automation')"
console_command_cremotemcp:
command: "document.querySelectorAll('input').length"
# Step 4: Try to interact with form
web_interact_cremotemcp:
action: "fill"
selector: "input[name='email']"
value: "test@example.com"
# Step 5: Check for any console errors after interaction
console_logs_cremotemcp:
clear: false
# Step 6: Debug element selection
console_command_cremotemcp:
command: "document.querySelector('input[name=\"email\"]') ? 'Found' : 'Not found'"
# Step 7: Check element properties
console_command_cremotemcp:
command: "document.querySelector('input[name=\"email\"]').value"
```
## Integration Notes
- Tools use the `_cremotemcp` suffix to avoid naming conflicts

View File

@ -7,6 +7,10 @@
- `web_screenshot_cremotemcp` - Take screenshots
- `web_manage_tabs_cremotemcp` - Manage browser tabs
- `web_iframe_cremotemcp` - Switch iframe context
- `file_upload_cremotemcp` - Upload files to container
- `file_download_cremotemcp` - Download files from container
- `console_logs_cremotemcp` - Get browser console logs
- `console_command_cremotemcp` - Execute console commands
## Essential Parameters
@ -64,6 +68,33 @@ web_interact_cremotemcp:
value: "/path/to/file.pdf"
```
### Upload File to Container
```yaml
file_upload_cremotemcp:
local_path: "/home/user/document.pdf"
container_path: "/tmp/upload.pdf"
```
### Download File from Container
```yaml
file_download_cremotemcp:
container_path: "/tmp/downloaded.pdf"
local_path: "/home/user/Downloads/file.pdf"
```
### Get Console Logs
```yaml
console_logs_cremotemcp:
clear: true
```
### Execute Console Command
```yaml
console_command_cremotemcp:
command: "document.title"
timeout: 10
```
## Best CSS Selectors
✅ **Good:**

Binary file not shown.

View File

@ -687,6 +687,115 @@ func main() {
}, nil
})
// Register console_logs tool
mcpServer.AddTool(mcp.Tool{
Name: "console_logs",
Description: "Get console logs from the browser tab",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional, uses current tab)",
},
"clear": map[string]any{
"type": "boolean",
"description": "Clear logs after retrieval (default: false)",
"default": false,
},
},
Required: []string{},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
tab := getStringParam(params, "tab", cremoteServer.currentTab)
clear := getBoolParam(params, "clear", false)
// Get console logs
logs, err := cremoteServer.client.GetConsoleLogs(tab, clear)
if err != nil {
return nil, fmt.Errorf("failed to get console logs: %w", err)
}
// Format logs for display
var logText string
if len(logs) == 0 {
logText = "No console logs found."
} else {
logText = fmt.Sprintf("Found %d console log entries:\n\n", len(logs))
for i, log := range logs {
level := log["level"].(string)
message := log["message"].(string)
timestamp := log["timestamp"].(string)
logText += fmt.Sprintf("[%d] %s [%s]: %s\n", i+1, timestamp, level, message)
}
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(logText),
},
IsError: false,
}, nil
})
// Register console_command tool
mcpServer.AddTool(mcp.Tool{
Name: "console_command",
Description: "Execute a command in the browser console",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"command": map[string]any{
"type": "string",
"description": "JavaScript command to execute in console",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional, uses current tab)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds (default: 5)",
"default": 5,
},
},
Required: []string{"command"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
command := getStringParam(params, "command", "")
tab := getStringParam(params, "tab", cremoteServer.currentTab)
timeout := getIntParam(params, "timeout", 5)
if command == "" {
return nil, fmt.Errorf("command parameter is required")
}
// Execute console command
result, err := cremoteServer.client.ExecuteConsoleCommand(tab, command, timeout)
if err != nil {
return nil, fmt.Errorf("failed to execute console command: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("Console command executed successfully.\nCommand: %s\nResult: %s", command, result)),
},
IsError: false,
}, nil
})
// Start the server
log.Printf("Cremote MCP server ready")
if err := server.ServeStdio(mcpServer); err != nil {