This commit is contained in:
Josh at WLTechBlog 2025-08-14 18:55:38 -05:00
parent d6209cd34f
commit dc6b288aa4
19 changed files with 4127 additions and 0 deletions

View File

@ -15,6 +15,17 @@ This architecture provides several benefits:
- No need to reconnect for each command
- Better performance
## MCP Server
Cremote includes a Model Context Protocol (MCP) server that provides a structured API for LLMs and AI agents. Instead of using CLI commands, the MCP server offers:
- **State Management**: Automatic tracking of tabs, history, and iframe context
- **Intelligent Abstractions**: High-level tools that combine multiple operations
- **Better Error Handling**: Rich error context for debugging
- **Automatic Screenshots**: Built-in screenshot capture for documentation
See the [MCP Server Documentation](mcp/README.md) for setup and usage instructions.
## Prerequisites
- Go 1.16 or higher

View File

@ -5,7 +5,9 @@ import (
"encoding/json"
"fmt"
"io"
"mime/multipart"
"net/http"
"os"
"strconv"
)
@ -602,3 +604,125 @@ func (c *Client) ClickElement(tabID, selector string, selectionTimeout, actionTi
return nil
}
// UploadFileToContainer uploads a file from the client to the container
// localPath: path to the file on the client machine
// containerPath: optional path where to store the file in the container (defaults to /tmp/filename)
func (c *Client) UploadFileToContainer(localPath, containerPath string) (string, error) {
// Open the local file
file, err := os.Open(localPath)
if err != nil {
return "", fmt.Errorf("failed to open local file: %w", err)
}
defer file.Close()
// Get file info
fileInfo, err := file.Stat()
if err != nil {
return "", fmt.Errorf("failed to get file info: %w", err)
}
// Create multipart form
var body bytes.Buffer
writer := multipart.NewWriter(&body)
// Add the file field
fileWriter, err := writer.CreateFormFile("file", fileInfo.Name())
if err != nil {
return "", fmt.Errorf("failed to create form file: %w", err)
}
_, err = io.Copy(fileWriter, file)
if err != nil {
return "", fmt.Errorf("failed to copy file data: %w", err)
}
// Add the path field if specified
if containerPath != "" {
err = writer.WriteField("path", containerPath)
if err != nil {
return "", fmt.Errorf("failed to write path field: %w", err)
}
}
err = writer.Close()
if err != nil {
return "", fmt.Errorf("failed to close multipart writer: %w", err)
}
// Send the request
req, err := http.NewRequest("POST", c.serverURL+"/upload", &body)
if err != nil {
return "", fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", writer.FormDataContentType())
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, body)
}
// Parse the response
var response Response
err = json.NewDecoder(resp.Body).Decode(&response)
if err != nil {
return "", fmt.Errorf("failed to decode response: %w", err)
}
if !response.Success {
return "", fmt.Errorf("upload failed: %s", response.Error)
}
// Extract the target path from response
data, ok := response.Data.(map[string]interface{})
if !ok {
return "", fmt.Errorf("invalid response data format")
}
targetPath, ok := data["target_path"].(string)
if !ok {
return "", fmt.Errorf("target_path not found in response")
}
return targetPath, nil
}
// DownloadFileFromContainer downloads a file from the container to the client
// containerPath: path to the file in the container
// localPath: path where to save the file on the client machine
func (c *Client) DownloadFileFromContainer(containerPath, localPath string) error {
// Create the request
url := fmt.Sprintf("%s/download?path=%s", c.serverURL, containerPath)
resp, err := http.Get(url)
if err != nil {
return fmt.Errorf("failed to send download request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("download failed with status %d: %s", resp.StatusCode, body)
}
// Create the local file
localFile, err := os.Create(localPath)
if err != nil {
return fmt.Errorf("failed to create local file: %w", err)
}
defer localFile.Close()
// Copy the response body to the local file
_, err = io.Copy(localFile, resp.Body)
if err != nil {
return fmt.Errorf("failed to save file: %w", err)
}
return nil
}

View File

@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"net"
"net/http"
@ -95,6 +96,8 @@ func NewDaemon(host string, port int) (*Daemon, error) {
mux := http.NewServeMux()
mux.HandleFunc("/command", daemon.handleCommand)
mux.HandleFunc("/status", daemon.handleStatus)
mux.HandleFunc("/upload", daemon.handleFileUpload)
mux.HandleFunc("/download", daemon.handleFileDownload)
daemon.server = &http.Server{
Addr: fmt.Sprintf("%s:%d", host, port),
@ -1637,3 +1640,105 @@ func (d *Daemon) switchToMain(tabID string) error {
return nil
}
// handleFileUpload handles file upload requests from clients
func (d *Daemon) handleFileUpload(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
// Parse multipart form (32MB max memory)
err := r.ParseMultipartForm(32 << 20)
if err != nil {
http.Error(w, "Failed to parse multipart form", http.StatusBadRequest)
return
}
// Get the uploaded file
file, header, err := r.FormFile("file")
if err != nil {
http.Error(w, "Failed to get uploaded file", http.StatusBadRequest)
return
}
defer file.Close()
// Get the target path (optional, defaults to /tmp/)
targetPath := r.FormValue("path")
if targetPath == "" {
targetPath = "/tmp/" + header.Filename
}
// Create the target file
targetFile, err := os.Create(targetPath)
if err != nil {
http.Error(w, fmt.Sprintf("Failed to create target file: %v", err), http.StatusInternalServerError)
return
}
defer targetFile.Close()
// Copy the uploaded file to the target location
_, err = io.Copy(targetFile, file)
if err != nil {
http.Error(w, fmt.Sprintf("Failed to save file: %v", err), http.StatusInternalServerError)
return
}
// Return success response
response := Response{
Success: true,
Data: map[string]interface{}{
"filename": header.Filename,
"size": header.Size,
"target_path": targetPath,
},
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(response)
}
// handleFileDownload handles file download requests from clients
func (d *Daemon) handleFileDownload(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
// Get the file path from query parameter
filePath := r.URL.Query().Get("path")
if filePath == "" {
http.Error(w, "File path is required", http.StatusBadRequest)
return
}
// Check if file exists and get info
fileInfo, err := os.Stat(filePath)
if err != nil {
if os.IsNotExist(err) {
http.Error(w, "File not found", http.StatusNotFound)
} else {
http.Error(w, fmt.Sprintf("Failed to access file: %v", err), http.StatusInternalServerError)
}
return
}
// Open the file
file, err := os.Open(filePath)
if err != nil {
http.Error(w, fmt.Sprintf("Failed to open file: %v", err), http.StatusInternalServerError)
return
}
defer file.Close()
// Set headers for file download
w.Header().Set("Content-Type", "application/octet-stream")
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", fileInfo.Name()))
w.Header().Set("Content-Length", strconv.FormatInt(fileInfo.Size(), 10))
// Stream the file to the client
_, err = io.Copy(w, file)
if err != nil {
log.Printf("Error streaming file to client: %v", err)
}
}

642
mcp/LLM_MCP_GUIDE.md Normal file
View File

@ -0,0 +1,642 @@
# LLM Agent Guide: Using Cremote MCP Server for Web Automation
This document provides comprehensive guidance for LLM agents on how to use the **Cremote MCP Server** for intelligent web automation. The MCP server provides a structured, stateful interface that's optimized for AI-driven web testing and automation workflows.
## What is the Cremote MCP Server?
The **Cremote MCP Server** is a Model Context Protocol implementation that wraps cremote's web automation capabilities in a structured API designed specifically for LLMs. Unlike CLI commands, the MCP server provides:
- **Automatic State Management**: Tracks current tab, tab history, and iframe context
- **Intelligent Abstractions**: High-level tools that combine multiple operations
- **Rich Error Context**: Detailed error information for better debugging
- **Automatic Screenshots**: Built-in screenshot capture for documentation
- **Structured Responses**: Consistent, parseable JSON responses
## Prerequisites
Before using the MCP server, ensure the cremote infrastructure is running:
1. **Check if everything is already running:**
```bash
cremote status
```
2. **Start Chromium with remote debugging (if needed):**
```bash
chromium --remote-debugging-port=9222 --user-data-dir=/tmp/chromium-debug &
```
3. **Start cremote daemon (if needed):**
```bash
cremotedaemon &
```
4. **The MCP server should be configured in your MCP client** (e.g., Claude Desktop)
## Available MCP Tools
### 1. `web_navigate` - Smart Navigation
Navigate to URLs with automatic tab management and optional screenshot capture.
**Parameters:**
- `url` (required): URL to navigate to
- `tab` (optional): Specific tab ID (uses current tab if not specified)
- `screenshot` (optional): Take screenshot after navigation (default: false)
- `timeout` (optional): Timeout in seconds (default: 5)
**Example:**
```json
{
"name": "web_navigate",
"arguments": {
"url": "https://example.com/login",
"screenshot": true,
"timeout": 10
}
}
```
**Smart Behavior:**
- Automatically opens a new tab if none exists
- Updates current tab tracking
- Adds tab to history for easy switching
### 2. `web_interact` - Element Interactions
Interact with web elements through a unified interface.
**Parameters:**
- `action` (required): "click", "fill", "submit", or "upload"
- `selector` (required): CSS selector for the target element
- `value` (optional): Value for fill/upload actions
- `tab` (optional): Tab ID (uses current tab if not specified)
- `timeout` (optional): Timeout in seconds (default: 5)
**Examples:**
```json
// Fill a form field
{
"name": "web_interact",
"arguments": {
"action": "fill",
"selector": "#username",
"value": "testuser"
}
}
// Click a button
{
"name": "web_interact",
"arguments": {
"action": "click",
"selector": "#login-button"
}
}
// Submit a form
{
"name": "web_interact",
"arguments": {
"action": "submit",
"selector": "form#login-form"
}
}
// Upload a file
{
"name": "web_interact",
"arguments": {
"action": "upload",
"selector": "input[type=file]",
"value": "/path/to/file.pdf"
}
}
```
### 3. `web_extract` - Data Extraction
Extract information from web pages through multiple methods.
**Parameters:**
- `type` (required): "source", "element", or "javascript"
- `selector` (optional): CSS selector (required for "element" type)
- `code` (optional): JavaScript code (required for "javascript" type)
- `tab` (optional): Tab ID (uses current tab if not specified)
- `timeout` (optional): Timeout in seconds (default: 5)
**Examples:**
```json
// Get page source
{
"name": "web_extract",
"arguments": {
"type": "source"
}
}
// Get specific element HTML
{
"name": "web_extract",
"arguments": {
"type": "element",
"selector": ".error-message"
}
}
// Execute JavaScript and get result
{
"name": "web_extract",
"arguments": {
"type": "javascript",
"code": "document.title"
}
}
// Check form validation
{
"name": "web_extract",
"arguments": {
"type": "javascript",
"code": "document.getElementById('email').validity.valid"
}
}
```
### 4. `web_screenshot` - Screenshot Capture
Take screenshots for documentation and debugging.
**Parameters:**
- `output` (required): File path for the screenshot
- `full_page` (optional): Capture full page vs viewport (default: false)
- `tab` (optional): Tab ID (uses current tab if not specified)
- `timeout` (optional): Timeout in seconds (default: 5)
**Examples:**
```json
// Viewport screenshot
{
"name": "web_screenshot",
"arguments": {
"output": "/tmp/login-page.png"
}
}
// Full page screenshot
{
"name": "web_screenshot",
"arguments": {
"output": "/tmp/full-page.png",
"full_page": true
}
}
```
### 5. `web_manage_tabs` - Tab Management
Manage browser tabs with automatic state tracking.
**Parameters:**
- `action` (required): "open", "close", "list", or "switch"
- `tab` (optional): Tab ID (required for "close" and "switch" actions)
- `timeout` (optional): Timeout in seconds (default: 5)
**Examples:**
```json
// Open new tab
{
"name": "web_manage_tabs",
"arguments": {
"action": "open"
}
}
// List all tabs
{
"name": "web_manage_tabs",
"arguments": {
"action": "list"
}
}
// Switch to specific tab
{
"name": "web_manage_tabs",
"arguments": {
"action": "switch",
"tab": "tab-id-123"
}
}
// Close current tab
{
"name": "web_manage_tabs",
"arguments": {
"action": "close"
}
}
```
### 6. `web_iframe` - Iframe Context Management
Switch between main page and iframe contexts for testing embedded content.
**Parameters:**
- `action` (required): "enter" or "exit"
- `selector` (optional): Iframe CSS selector (required for "enter" action)
- `tab` (optional): Tab ID (uses current tab if not specified)
**Examples:**
```json
// Enter iframe context
{
"name": "web_iframe",
"arguments": {
"action": "enter",
"selector": "iframe#payment-form"
}
}
// Exit iframe context (return to main page)
{
"name": "web_iframe",
"arguments": {
"action": "exit"
}
}
```
## Response Format
All MCP tools return a consistent response structure:
```json
{
"success": true,
"data": "...", // Tool-specific response data
"screenshot": "/tmp/shot.png", // Screenshot path (if captured)
"current_tab": "tab-id-123", // Current active tab
"tab_history": ["tab-id-123"], // Tab history stack
"iframe_mode": false, // Whether in iframe context
"error": null, // Error message (if failed)
"metadata": {} // Additional context information
}
```
## Common Automation Patterns
### 1. Login Flow Testing
```json
// 1. Navigate to login page with screenshot
{
"name": "web_navigate",
"arguments": {
"url": "https://myapp.com/login",
"screenshot": true
}
}
// 2. Fill credentials
{
"name": "web_interact",
"arguments": {
"action": "fill",
"selector": "#email",
"value": "user@example.com"
}
}
{
"name": "web_interact",
"arguments": {
"action": "fill",
"selector": "#password",
"value": "password123"
}
}
// 3. Submit login
{
"name": "web_interact",
"arguments": {
"action": "click",
"selector": "#login-button"
}
}
// 4. Verify success
{
"name": "web_extract",
"arguments": {
"type": "javascript",
"code": "document.querySelector('.welcome-message')?.textContent"
}
}
// 5. Document result
{
"name": "web_screenshot",
"arguments": {
"output": "/tmp/login-success.png"
}
}
```
### 2. Form Validation Testing
```json
// 1. Navigate to form
{
"name": "web_navigate",
"arguments": {
"url": "https://myapp.com/register"
}
}
// 2. Test empty form submission
{
"name": "web_interact",
"arguments": {
"action": "click",
"selector": "#submit-button"
}
}
// 3. Check for validation errors
{
"name": "web_extract",
"arguments": {
"type": "element",
"selector": ".error-message"
}
}
// 4. Test invalid email
{
"name": "web_interact",
"arguments": {
"action": "fill",
"selector": "#email",
"value": "invalid-email"
}
}
// 5. Verify JavaScript validation
{
"name": "web_extract",
"arguments": {
"type": "javascript",
"code": "document.getElementById('email').validity.valid"
}
}
```
### 3. Multi-Tab Workflow
```json
// 1. Open multiple tabs for comparison
{
"name": "web_manage_tabs",
"arguments": {
"action": "open"
}
}
{
"name": "web_navigate",
"arguments": {
"url": "https://app.com/admin"
}
}
{
"name": "web_manage_tabs",
"arguments": {
"action": "open"
}
}
{
"name": "web_navigate",
"arguments": {
"url": "https://app.com/user"
}
}
// 2. List tabs to see current state
{
"name": "web_manage_tabs",
"arguments": {
"action": "list"
}
}
// 3. Switch between tabs as needed
{
"name": "web_manage_tabs",
"arguments": {
"action": "switch",
"tab": "first-tab-id"
}
}
```
### 4. Iframe Testing (Payment Forms, Widgets)
```json
// 1. Navigate to page with iframe
{
"name": "web_navigate",
"arguments": {
"url": "https://shop.com/checkout"
}
}
// 2. Enter iframe context
{
"name": "web_iframe",
"arguments": {
"action": "enter",
"selector": "iframe.payment-frame"
}
}
// 3. Interact with iframe content
{
"name": "web_interact",
"arguments": {
"action": "fill",
"selector": "#card-number",
"value": "4111111111111111"
}
}
{
"name": "web_interact",
"arguments": {
"action": "fill",
"selector": "#expiry",
"value": "12/25"
}
}
// 4. Exit iframe context
{
"name": "web_iframe",
"arguments": {
"action": "exit"
}
}
// 5. Continue with main page
{
"name": "web_interact",
"arguments": {
"action": "click",
"selector": "#complete-order"
}
}
```
## Best Practices for LLMs
### 1. State Awareness
- The MCP server automatically tracks state, but always check the response for current context
- Use the `current_tab` and `iframe_mode` fields to understand your current position
- The `tab_history` helps you understand available tabs
### 2. Error Handling
- Always check the `success` field in responses
- Use the `error` field for detailed error information
- Take screenshots when errors occur for debugging: `"screenshot": true`
### 3. Timeout Management
- Use longer timeouts for slow-loading pages or complex interactions
- Default 5-second timeouts work for most scenarios
- Increase timeouts for file uploads or heavy JavaScript applications
### 4. Screenshot Strategy
- Take screenshots at key points for documentation
- Use `full_page: true` for comprehensive page captures
- Screenshot before and after critical actions for debugging
### 5. Verification Patterns
- Always verify actions completed successfully
- Use JavaScript extraction to check application state
- Combine element extraction with JavaScript validation
## Debugging Failed Tests
### 1. Capture Current State
```json
// Get page source for analysis
{
"name": "web_extract",
"arguments": {
"type": "source"
}
}
// Take screenshot to see visual state
{
"name": "web_screenshot",
"arguments": {
"output": "/tmp/debug-state.png",
"full_page": true
}
}
// Check JavaScript console errors
{
"name": "web_extract",
"arguments": {
"type": "javascript",
"code": "console.error.toString()"
}
}
```
### 2. Element Debugging
```json
// Check if element exists
{
"name": "web_extract",
"arguments": {
"type": "javascript",
"code": "document.querySelector('#my-element') !== null"
}
}
// Get element properties
{
"name": "web_extract",
"arguments": {
"type": "javascript",
"code": "JSON.stringify({visible: document.querySelector('#my-element')?.offsetParent !== null, text: document.querySelector('#my-element')?.textContent})"
}
}
```
### 3. Network and Loading Issues
```json
// Check if page is still loading
{
"name": "web_extract",
"arguments": {
"type": "javascript",
"code": "document.readyState"
}
}
// Check for JavaScript errors
{
"name": "web_extract",
"arguments": {
"type": "javascript",
"code": "window.onerror ? 'Errors detected' : 'No errors'"
}
}
```
## Advantages Over CLI Commands
### 1. **Automatic State Management**
- No need to manually track tab IDs
- Automatic current tab resolution
- Persistent iframe context tracking
### 2. **Rich Error Context**
- Detailed error messages with context
- Automatic screenshot capture on failures
- Structured error responses for better debugging
### 3. **Intelligent Abstractions**
- Combined operations in single tools
- Smart parameter defaults and validation
- Automatic resource management
### 4. **Better Performance**
- Direct library integration (no subprocess overhead)
- Persistent connections to cremote daemon
- Efficient state tracking
### 5. **Structured Responses**
- Consistent JSON format for all responses
- Rich metadata for decision making
- Easy parsing and error handling
## Key Differences from CLI Usage
| Aspect | CLI Commands | MCP Server |
|--------|-------------|------------|
| **State Tracking** | Manual tab ID management | Automatic state management |
| **Error Handling** | Text parsing required | Structured error objects |
| **Screenshots** | Manual command execution | Automatic capture options |
| **Performance** | Subprocess overhead | Direct library calls |
| **Response Format** | Text output | Structured JSON |
| **Context Management** | Manual iframe tracking | Automatic context switching |
| **Resource Cleanup** | Manual tab management | Automatic resource tracking |
The Cremote MCP Server transforms web automation from a series of CLI commands into an intelligent, stateful API that's optimized for AI-driven testing and automation workflows.

379
mcp/LLM_USAGE_GUIDE.md Normal file
View File

@ -0,0 +1,379 @@
# Cremote MCP Tools - LLM Usage Guide
This guide explains how LLMs can use the cremote MCP (Model Context Protocol) tools for web automation tasks.
## Available Tools
The cremote MCP server provides six comprehensive web automation tools:
### 1. `web_navigate_cremotemcp`
Navigate to URLs and optionally take screenshots.
**Parameters:**
- `url` (required): The URL to navigate to
- `screenshot` (optional): Boolean, take a screenshot after navigation
- `tab` (optional): Specific tab ID to use
- `timeout` (optional): Timeout in seconds (default: 5)
**Example Usage:**
```
web_navigate_cremotemcp:
url: "https://example.com"
screenshot: true
```
### 2. `web_interact_cremotemcp`
Interact with web elements through various actions.
**Parameters:**
- `action` (required): One of "click", "fill", "submit", "upload"
- `selector` (required): CSS selector for the target element
- `value` (optional): Value for fill/upload actions
- `tab` (optional): Specific tab ID to use
- `timeout` (optional): Timeout in seconds (default: 5)
**Example Usage:**
```
web_interact_cremotemcp:
action: "click"
selector: "button.submit"
web_interact_cremotemcp:
action: "fill"
selector: "input[name='email']"
value: "user@example.com"
```
### 3. `web_extract_cremotemcp`
Extract data from web pages (HTML source, element content, or JavaScript execution).
**Parameters:**
- `type` (required): One of "source", "element", "javascript"
- `selector` (optional): CSS selector (required for "element" type)
- `code` (optional): JavaScript code (required for "javascript" type)
- `tab` (optional): Specific tab ID to use
- `timeout` (optional): Timeout in seconds (default: 5)
**Example Usage:**
```
web_extract_cremotemcp:
type: "source"
web_extract_cremotemcp:
type: "element"
selector: "div.content"
web_extract_cremotemcp:
type: "javascript"
code: "document.title"
```
### 4. `web_screenshot_cremotemcp`
Take screenshots of web pages.
**Parameters:**
- `output` (required): File path where screenshot will be saved
- `full_page` (optional): Capture full page (default: false)
- `tab` (optional): Specific tab ID to use
- `timeout` (optional): Timeout in seconds (default: 5)
**Example Usage:**
```
web_screenshot_cremotemcp:
output: "/tmp/page-screenshot.png"
full_page: true
```
### 5. `web_manage_tabs_cremotemcp`
Manage browser tabs (open, close, list, switch).
**Parameters:**
- `action` (required): One of "open", "close", "list", "switch"
- `tab` (optional): Tab ID (required for "close" and "switch" actions)
- `timeout` (optional): Timeout in seconds (default: 5)
**Example Usage:**
```
web_manage_tabs_cremotemcp:
action: "open"
web_manage_tabs_cremotemcp:
action: "list"
web_manage_tabs_cremotemcp:
action: "switch"
tab: "ABC123"
```
### 6. `web_iframe_cremotemcp`
Switch iframe context for subsequent operations.
**Parameters:**
- `action` (required): One of "enter", "exit"
- `selector` (optional): Iframe CSS selector (required for "enter" action)
- `tab` (optional): Specific tab ID to use
**Example Usage:**
```
web_iframe_cremotemcp:
action: "enter"
selector: "iframe#payment-form"
web_iframe_cremotemcp:
action: "exit"
```
## Common Usage Patterns
### 1. Basic Web Navigation
```
# Navigate to a website
web_navigate_cremotemcp:
url: "https://example.com"
screenshot: true
```
### 2. Form Interaction Sequence
```
# 1. Navigate to the page
web_navigate_cremotemcp:
url: "https://example.com/login"
# 2. Fill username field
web_interact_cremotemcp:
action: "fill"
selector: "input[name='username']"
value: "myusername"
# 3. Fill password field
web_interact_cremotemcp:
action: "fill"
selector: "input[name='password']"
value: "mypassword"
# 4. Submit the form
web_interact_cremotemcp:
action: "submit"
selector: "form"
```
### 3. Clicking Elements
```
# Click a button
web_interact_cremotemcp:
action: "click"
selector: "button#submit-btn"
# Click a link
web_interact_cremotemcp:
action: "click"
selector: "a[href='/dashboard']"
```
## Best Practices for LLMs
### 1. Always Start with Navigation
Before interacting with elements, navigate to the target page:
```
web_navigate_cremotemcp:
url: "https://target-website.com"
```
### 2. Use Specific CSS Selectors
Be as specific as possible with selectors to avoid ambiguity:
- Good: `input[name='email']`, `button.primary-submit`
- Avoid: `input`, `button`
### 3. Take Screenshots for Debugging
When troubleshooting or documenting, use screenshots:
```
web_navigate_cremotemcp:
url: "https://example.com"
screenshot: true
```
### 4. Handle Timeouts Appropriately
For slow-loading pages, increase timeout:
```
web_navigate_cremotemcp:
url: "https://slow-website.com"
timeout: 10
```
### 5. Sequential Operations
Perform operations in logical sequence:
1. Navigate to page
2. Fill required fields
3. Submit forms
4. Navigate to next page if needed
## Error Handling
### Common Error Scenarios:
1. **Element not found**: Selector doesn't match any elements
2. **Timeout**: Page takes too long to load or element to appear
3. **Navigation failed**: URL is invalid or unreachable
### Troubleshooting Tips:
1. Verify the URL is correct and accessible
2. Check CSS selectors using browser developer tools
3. Increase timeout for slow-loading content
4. Take screenshots to see current page state
## Tab Management
The tools automatically manage browser tabs:
- If no tab is specified, a new tab is created automatically
- Tab IDs are returned in responses for reference
- Multiple tabs can be managed by specifying tab IDs
## Security Considerations
### Safe Practices:
- Only navigate to trusted websites
- Be cautious with form submissions
- Avoid entering sensitive information in examples
- Use screenshots sparingly to avoid exposing sensitive data
### Limitations:
- Cannot bypass CAPTCHA or other anti-automation measures
- Subject to same-origin policy restrictions
- May not work with heavily JavaScript-dependent sites
## Example: Complete Web Automation Task
Here's a complete example of automating a web form:
```
# Step 1: Navigate to the form page
web_navigate_cremotemcp:
url: "https://example.com/contact"
screenshot: true
# Step 2: Fill out the contact form
web_interact_cremotemcp:
action: "fill"
selector: "input[name='name']"
value: "John Doe"
web_interact_cremotemcp:
action: "fill"
selector: "input[name='email']"
value: "john@example.com"
web_interact_cremotemcp:
action: "fill"
selector: "textarea[name='message']"
value: "Hello, this is a test message."
# Step 3: Submit the form
web_interact_cremotemcp:
action: "submit"
selector: "form#contact-form"
# Step 4: Take a screenshot of the result
web_navigate_cremotemcp:
url: "current" # Stay on current page
screenshot: true
```
## Integration Notes
- Tools use the `_cremotemcp` suffix to avoid naming conflicts
- Responses include success status and descriptive messages
- Screenshots are saved to `/tmp/` directory with timestamps
- The underlying cremote daemon handles browser management
## Advanced Usage Examples
### Testing Web Applications
```
# Navigate to application
web_navigate_cremotemcp:
url: "https://myapp.com/login"
screenshot: true
# Test login functionality
web_interact_cremotemcp:
action: "fill"
selector: "#username"
value: "testuser"
web_interact_cremotemcp:
action: "fill"
selector: "#password"
value: "testpass"
web_interact_cremotemcp:
action: "click"
selector: "button[type='submit']"
# Verify successful login
web_navigate_cremotemcp:
url: "current"
screenshot: true
```
### Data Extraction Workflows
```
# Navigate to data source
web_navigate_cremotemcp:
url: "https://data-site.com/table"
# Click through pagination or filters
web_interact_cremotemcp:
action: "click"
selector: ".filter-button"
# Take screenshot to document current state
web_navigate_cremotemcp:
url: "current"
screenshot: true
```
### File Upload Testing
```
# Navigate to upload form
web_navigate_cremotemcp:
url: "https://example.com/upload"
# Upload a file
web_interact_cremotemcp:
action: "upload"
selector: "input[type='file']"
value: "/path/to/test-file.pdf"
# Submit the upload form
web_interact_cremotemcp:
action: "click"
selector: "button.upload-submit"
```
## Tool Response Format
Both tools return structured responses:
**Success Response:**
```
"Successfully navigated to https://example.com in tab ABC123 (screenshot saved to /tmp/navigate-1234567890.png)"
```
**Error Response:**
```
"failed to load URL: context deadline exceeded"
```
## CSS Selector Best Practices
### Recommended Selector Types:
1. **ID selectors**: `#unique-id` (most reliable)
2. **Name attributes**: `input[name='fieldname']`
3. **Class combinations**: `.primary.button`
4. **Attribute selectors**: `button[data-action='submit']`
### Avoid These Selectors:
1. **Generic tags**: `div`, `span`, `input` (too broad)
2. **Position-based**: `:nth-child()` (fragile)
3. **Text-based**: `:contains()` (not standard CSS)
This documentation should help LLMs effectively use the cremote MCP tools for web automation tasks.

96
mcp/QUICK_REFERENCE.md Normal file
View File

@ -0,0 +1,96 @@
# Cremote MCP Tools - Quick Reference
## Tool Names
- `web_navigate_cremotemcp` - Navigate to URLs
- `web_interact_cremotemcp` - Interact with elements
- `web_extract_cremotemcp` - Extract page data
- `web_screenshot_cremotemcp` - Take screenshots
- `web_manage_tabs_cremotemcp` - Manage browser tabs
- `web_iframe_cremotemcp` - Switch iframe context
## Essential Parameters
### web_navigate_cremotemcp
```yaml
url: "https://example.com" # Required
screenshot: true # Optional, default false
timeout: 10 # Optional, default 5 seconds
```
### web_interact_cremotemcp
```yaml
action: "click" # Required: click|fill|submit|upload
selector: "button.submit" # Required: CSS selector
value: "text to fill" # Required for fill/upload actions
timeout: 10 # Optional, default 5 seconds
```
## Common Patterns
### Navigate + Screenshot
```yaml
web_navigate_cremotemcp:
url: "https://example.com"
screenshot: true
```
### Fill Form Field
```yaml
web_interact_cremotemcp:
action: "fill"
selector: "input[name='email']"
value: "user@example.com"
```
### Click Button
```yaml
web_interact_cremotemcp:
action: "click"
selector: "button#submit"
```
### Submit Form
```yaml
web_interact_cremotemcp:
action: "submit"
selector: "form"
```
### Upload File
```yaml
web_interact_cremotemcp:
action: "upload"
selector: "input[type='file']"
value: "/path/to/file.pdf"
```
## Best CSS Selectors
✅ **Good:**
- `#unique-id`
- `input[name='fieldname']`
- `button.primary-submit`
- `form#login-form`
❌ **Avoid:**
- `div` (too generic)
- `input` (too broad)
- `:nth-child(3)` (fragile)
## Typical Workflow
1. **Navigate** to target page
2. **Fill** required form fields
3. **Click** submit buttons
4. **Take screenshots** for verification
5. **Navigate** to next page if needed
## Error Handling
- **Element not found**: Check CSS selector
- **Timeout**: Increase timeout parameter
- **Navigation failed**: Verify URL accessibility
## Screenshots
Screenshots are automatically saved to `/tmp/navigate-{timestamp}.png` when requested.

252
mcp/README.md Normal file
View File

@ -0,0 +1,252 @@
# Cremote MCP Server
This is a Model Context Protocol (MCP) server that exposes cremote's web automation capabilities to LLMs and AI agents. Instead of using CLI commands, this server provides a structured API that maintains state and provides intelligent abstractions.
## Features
- **State Management**: Automatically tracks current tab, tab history, and iframe context
- **Intelligent Abstractions**: High-level tools that combine multiple cremote operations
- **Automatic Screenshots**: Optional screenshot capture for debugging and documentation
- **Error Recovery**: Better error handling and context for LLMs
- **Resource Management**: Automatic cleanup and connection management
## Quick Start for LLMs
**For LLM agents**: See the comprehensive [LLM MCP Guide](LLM_MCP_GUIDE.md) for detailed usage instructions, examples, and best practices.
## Available Tools
### 1. `web_navigate`
Navigate to URLs with optional screenshot capture.
```json
{
"name": "web_navigate",
"arguments": {
"url": "https://example.com",
"screenshot": true,
"timeout": 10
}
}
```
### 2. `web_interact`
Interact with web elements (click, fill, submit, upload).
```json
{
"name": "web_interact",
"arguments": {
"action": "fill",
"selector": "#username",
"value": "testuser",
"timeout": 5
}
}
```
### 3. `web_extract`
Extract data from pages (source, element HTML, JavaScript execution).
```json
{
"name": "web_extract",
"arguments": {
"type": "javascript",
"code": "document.title",
"timeout": 5
}
}
```
### 4. `web_screenshot`
Take screenshots of the current page.
```json
{
"name": "web_screenshot",
"arguments": {
"output": "/tmp/page.png",
"full_page": true,
"timeout": 5
}
}
```
### 5. `web_manage_tabs`
Manage browser tabs (open, close, list, switch).
```json
{
"name": "web_manage_tabs",
"arguments": {
"action": "open",
"timeout": 5
}
}
```
### 6. `web_iframe`
Switch iframe context for subsequent operations.
```json
{
"name": "web_iframe",
"arguments": {
"action": "enter",
"selector": "iframe#payment-form"
}
}
```
## Installation & Usage
### Prerequisites
1. **Cremote daemon must be running**:
```bash
cremotedaemon
```
2. **Chrome/Chromium with remote debugging**:
```bash
chromium --remote-debugging-port=9222 --user-data-dir=/tmp/chromium-debug
```
### Build the MCP Server
```bash
cd mcp/
go build -o cremote-mcp .
```
### Configuration
Set environment variables to configure the cremote connection:
```bash
export CREMOTE_HOST=localhost
export CREMOTE_PORT=8989
```
### Running with Claude Desktop
Add to your Claude Desktop configuration (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):
```json
{
"mcpServers": {
"cremote": {
"command": "/path/to/cremote-mcp",
"env": {
"CREMOTE_HOST": "localhost",
"CREMOTE_PORT": "8989"
}
}
}
}
```
### Running with Other MCP Clients
The server communicates via JSON-RPC over stdio, so it can be used with any MCP-compatible client:
```bash
echo '{"method":"tools/list","params":{},"id":1}' | ./cremote-mcp
```
## Response Format
All tool responses include:
```json
{
"success": true,
"data": "...",
"screenshot": "/tmp/screenshot.png",
"current_tab": "tab-id-123",
"tab_history": ["tab-id-123", "tab-id-456"],
"iframe_mode": false,
"error": null,
"metadata": {}
}
```
## Example Workflow
```json
// 1. Navigate to a page
{
"name": "web_navigate",
"arguments": {
"url": "https://example.com/login",
"screenshot": true
}
}
// 2. Fill login form
{
"name": "web_interact",
"arguments": {
"action": "fill",
"selector": "#username",
"value": "testuser"
}
}
{
"name": "web_interact",
"arguments": {
"action": "fill",
"selector": "#password",
"value": "password123"
}
}
// 3. Submit form
{
"name": "web_interact",
"arguments": {
"action": "click",
"selector": "#login-button"
}
}
// 4. Extract result
{
"name": "web_extract",
"arguments": {
"type": "javascript",
"code": "document.querySelector('.welcome-message')?.textContent"
}
}
// 5. Take final screenshot
{
"name": "web_screenshot",
"arguments": {
"output": "/tmp/login-success.png",
"full_page": true
}
}
```
## Benefits Over CLI
- **State Management**: No need to manually track tab IDs
- **Better Error Context**: Rich error information for debugging
- **Automatic Screenshots**: Built-in screenshot capture for documentation
- **Intelligent Defaults**: Smart parameter handling and fallbacks
- **Resource Cleanup**: Automatic management of tabs and files
- **Structured Responses**: Consistent, parseable response format
## Development
To extend the MCP server with new tools:
1. Add the tool definition to `handleToolsList()`
2. Add a case in `handleToolCall()`
3. Implement the handler function following the pattern of existing handlers
4. Update this documentation
The server is designed to be easily extensible while maintaining consistency with the cremote client library.

BIN
mcp/backup/cremote-mcp Executable file

Binary file not shown.

BIN
mcp/backup/cremote-mcp-http Executable file

Binary file not shown.

BIN
mcp/backup/cremote-mcp-stdio Executable file

Binary file not shown.

923
mcp/backup/server-stdio.go Normal file
View File

@ -0,0 +1,923 @@
package main
import (
"bufio"
"encoding/json"
"fmt"
"io"
"log"
"os"
"os/signal"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
"git.teamworkapps.com/shortcut/cremote/client"
)
// DebugLogger handles debug logging to file
type DebugLogger struct {
file *os.File
logger *log.Logger
}
// NewDebugLogger creates a new debug logger
func NewDebugLogger() (*DebugLogger, error) {
logDir := "/tmp/cremote-mcp-logs"
if err := os.MkdirAll(logDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create log directory: %w", err)
}
logFile := filepath.Join(logDir, fmt.Sprintf("mcp-stdio-%d.log", time.Now().Unix()))
file, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
if err != nil {
return nil, fmt.Errorf("failed to open log file: %w", err)
}
logger := log.New(file, "", log.LstdFlags|log.Lmicroseconds|log.Lshortfile)
logger.Printf("=== MCP STDIO Server Debug Log Started ===")
return &DebugLogger{
file: file,
logger: logger,
}, nil
}
// Log writes a debug message
func (d *DebugLogger) Log(format string, args ...interface{}) {
if d != nil && d.logger != nil {
d.logger.Printf(format, args...)
}
}
// LogJSON logs a JSON object with a label
func (d *DebugLogger) LogJSON(label string, obj interface{}) {
if d != nil && d.logger != nil {
jsonBytes, err := json.MarshalIndent(obj, "", " ")
if err != nil {
d.logger.Printf("%s: JSON marshal error: %v", label, err)
} else {
d.logger.Printf("%s:\n%s", label, string(jsonBytes))
}
}
}
// Close closes the debug logger
func (d *DebugLogger) Close() {
if d != nil && d.file != nil {
d.logger.Printf("=== MCP STDIO Server Debug Log Ended ===")
d.file.Close()
}
}
var debugLogger *DebugLogger
// MCPServer wraps the cremote client with MCP protocol
type MCPServer struct {
client *client.Client
currentTab string
tabHistory []string
iframeMode bool
screenshots []string
}
// MCPRequest represents an incoming MCP request
type MCPRequest struct {
JSONRPC string `json:"jsonrpc,omitempty"`
Method string `json:"method"`
Params map[string]interface{} `json:"params"`
ID interface{} `json:"id"`
}
// MCPResponse represents an MCP response
type MCPResponse struct {
JSONRPC string `json:"jsonrpc,omitempty"`
Result interface{} `json:"result,omitempty"`
Error *MCPError `json:"error,omitempty"`
ID interface{} `json:"id"`
}
// MCPError represents an MCP error
type MCPError struct {
Code int `json:"code"`
Message string `json:"message"`
}
// ToolResult represents the result of a tool execution
type ToolResult struct {
Success bool `json:"success"`
Data interface{} `json:"data,omitempty"`
Screenshot string `json:"screenshot,omitempty"`
CurrentTab string `json:"current_tab,omitempty"`
TabHistory []string `json:"tab_history,omitempty"`
IframeMode bool `json:"iframe_mode"`
Error string `json:"error,omitempty"`
Metadata map[string]string `json:"metadata,omitempty"`
}
// NewMCPServer creates a new MCP server instance
func NewMCPServer(host string, port int) *MCPServer {
c := client.NewClient(host, port)
return &MCPServer{
client: c,
tabHistory: make([]string, 0),
screenshots: make([]string, 0),
}
}
// HandleRequest processes an MCP request and returns a response
func (s *MCPServer) HandleRequest(req MCPRequest) MCPResponse {
debugLogger.Log("HandleRequest called with method: %s, ID: %v", req.Method, req.ID)
debugLogger.LogJSON("Incoming Request", req)
var resp MCPResponse
switch req.Method {
case "initialize":
debugLogger.Log("Handling initialize request")
resp = s.handleInitialize(req)
case "tools/list":
debugLogger.Log("Handling tools/list request")
resp = s.handleToolsList(req)
case "tools/call":
debugLogger.Log("Handling tools/call request")
resp = s.handleToolCall(req)
default:
debugLogger.Log("Unknown method: %s", req.Method)
resp = MCPResponse{
Error: &MCPError{
Code: -32601,
Message: fmt.Sprintf("Method not found: %s", req.Method),
},
ID: req.ID,
}
}
debugLogger.LogJSON("Response", resp)
debugLogger.Log("HandleRequest completed for method: %s", req.Method)
return resp
}
// handleInitialize handles the MCP initialize request
func (s *MCPServer) handleInitialize(req MCPRequest) MCPResponse {
debugLogger.Log("handleInitialize: Processing initialize request")
debugLogger.LogJSON("Initialize request params", req.Params)
result := map[string]interface{}{
"protocolVersion": "2024-11-05",
"capabilities": map[string]interface{}{
"tools": map[string]interface{}{
"listChanged": true,
},
},
"serverInfo": map[string]interface{}{
"name": "cremote-mcp",
"version": "1.0.0",
},
}
debugLogger.LogJSON("Initialize response result", result)
return MCPResponse{
Result: result,
ID: req.ID,
}
}
// handleToolsList returns the list of available tools
func (s *MCPServer) handleToolsList(req MCPRequest) MCPResponse {
tools := []map[string]interface{}{
{
"name": "web_navigate",
"description": "Navigate to a URL and optionally take a screenshot",
"inputSchema": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"url": map[string]interface{}{
"type": "string",
"description": "URL to navigate to",
},
"tab": map[string]interface{}{
"type": "string",
"description": "Tab ID (optional, uses current tab)",
},
"screenshot": map[string]interface{}{
"type": "boolean",
"description": "Take screenshot after navigation",
},
"timeout": map[string]interface{}{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
"required": []string{"url"},
},
},
{
"name": "web_interact",
"description": "Interact with web elements (click, fill, submit)",
"inputSchema": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"action": map[string]interface{}{
"type": "string",
"enum": []string{"click", "fill", "submit", "upload"},
},
"selector": map[string]interface{}{
"type": "string",
"description": "CSS selector for the element",
},
"value": map[string]interface{}{
"type": "string",
"description": "Value to fill (for fill/upload actions)",
},
"tab": map[string]interface{}{
"type": "string",
"description": "Tab ID (optional)",
},
"timeout": map[string]interface{}{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
"required": []string{"action", "selector"},
},
},
{
"name": "web_extract",
"description": "Extract data from the page (source, element HTML, or execute JavaScript)",
"inputSchema": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"type": map[string]interface{}{
"type": "string",
"enum": []string{"source", "element", "javascript"},
},
"selector": map[string]interface{}{
"type": "string",
"description": "CSS selector (for element type)",
},
"code": map[string]interface{}{
"type": "string",
"description": "JavaScript code (for javascript type)",
},
"tab": map[string]interface{}{
"type": "string",
"description": "Tab ID (optional)",
},
"timeout": map[string]interface{}{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
"required": []string{"type"},
},
},
{
"name": "web_screenshot",
"description": "Take a screenshot of the current page",
"inputSchema": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"output": map[string]interface{}{
"type": "string",
"description": "Output file path",
},
"full_page": map[string]interface{}{
"type": "boolean",
"description": "Capture full page",
"default": false,
},
"tab": map[string]interface{}{
"type": "string",
"description": "Tab ID (optional)",
},
"timeout": map[string]interface{}{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
"required": []string{"output"},
},
},
{
"name": "web_manage_tabs",
"description": "Manage browser tabs (open, close, list, switch)",
"inputSchema": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"action": map[string]interface{}{
"type": "string",
"enum": []string{"open", "close", "list", "switch"},
},
"tab": map[string]interface{}{
"type": "string",
"description": "Tab ID (for close/switch actions)",
},
"timeout": map[string]interface{}{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
"required": []string{"action"},
},
},
{
"name": "web_iframe",
"description": "Switch iframe context for subsequent operations",
"inputSchema": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"action": map[string]interface{}{
"type": "string",
"enum": []string{"enter", "exit"},
},
"selector": map[string]interface{}{
"type": "string",
"description": "Iframe CSS selector (for enter action)",
},
"tab": map[string]interface{}{
"type": "string",
"description": "Tab ID (optional)",
},
},
"required": []string{"action"},
},
},
}
return MCPResponse{
Result: map[string]interface{}{
"tools": tools,
},
ID: req.ID,
}
}
// handleToolCall executes a tool and returns the result
func (s *MCPServer) handleToolCall(req MCPRequest) MCPResponse {
debugLogger.Log("handleToolCall: Processing tool call request")
debugLogger.LogJSON("Tool call request params", req.Params)
params := req.Params
toolName, ok := params["name"].(string)
if !ok || toolName == "" {
debugLogger.Log("handleToolCall: Tool name missing or invalid")
return MCPResponse{
Error: &MCPError{Code: -32602, Message: "Missing tool name"},
ID: req.ID,
}
}
debugLogger.Log("handleToolCall: Tool name extracted: %s", toolName)
arguments, _ := params["arguments"].(map[string]interface{})
if arguments == nil {
debugLogger.Log("handleToolCall: No arguments provided, using empty map")
arguments = make(map[string]interface{})
} else {
debugLogger.LogJSON("Tool arguments", arguments)
}
var result ToolResult
var err error
debugLogger.Log("handleToolCall: Dispatching to tool handler: %s", toolName)
switch toolName {
case "web_navigate":
result, err = s.handleNavigate(arguments)
case "web_interact":
result, err = s.handleInteract(arguments)
case "web_extract":
result, err = s.handleExtract(arguments)
case "web_screenshot":
result, err = s.handleScreenshot(arguments)
case "web_manage_tabs":
result, err = s.handleManageTabs(arguments)
case "web_iframe":
result, err = s.handleIframe(arguments)
default:
debugLogger.Log("handleToolCall: Unknown tool: %s", toolName)
return MCPResponse{
Error: &MCPError{Code: -32601, Message: fmt.Sprintf("Unknown tool: %s", toolName)},
ID: req.ID,
}
}
debugLogger.LogJSON("Tool execution result", result)
if err != nil {
debugLogger.Log("handleToolCall: Tool execution error: %v", err)
return MCPResponse{
Error: &MCPError{Code: -32603, Message: err.Error()},
ID: req.ID,
}
}
// Always include current state in response
result.CurrentTab = s.currentTab
result.TabHistory = s.tabHistory
result.IframeMode = s.iframeMode
response := MCPResponse{Result: result, ID: req.ID}
debugLogger.LogJSON("Final tool call response", response)
debugLogger.Log("handleToolCall: Completed successfully for tool: %s", toolName)
return response
}
// Helper functions for parameter extraction
func getStringParam(params map[string]interface{}, key, defaultValue string) string {
if val, ok := params[key].(string); ok {
return val
}
return defaultValue
}
func getIntParam(params map[string]interface{}, key string, defaultValue int) int {
if val, ok := params[key].(float64); ok {
return int(val)
}
return defaultValue
}
func getBoolParam(params map[string]interface{}, key string, defaultValue bool) bool {
if val, ok := params[key].(bool); ok {
return val
}
return defaultValue
}
// resolveTabID returns the tab ID to use, defaulting to current tab
func (s *MCPServer) resolveTabID(tabID string) string {
if tabID != "" {
return tabID
}
return s.currentTab
}
// handleNavigate handles web navigation
func (s *MCPServer) handleNavigate(params map[string]interface{}) (ToolResult, error) {
url := getStringParam(params, "url", "")
if url == "" {
return ToolResult{}, fmt.Errorf("url parameter is required")
}
tab := getStringParam(params, "tab", "")
screenshot := getBoolParam(params, "screenshot", false)
timeout := getIntParam(params, "timeout", 5)
// If no tab specified and no current tab, open a new one
if tab == "" && s.currentTab == "" {
newTab, err := s.client.OpenTab(timeout)
if err != nil {
return ToolResult{}, fmt.Errorf("failed to open new tab: %w", err)
}
s.currentTab = newTab
s.tabHistory = append(s.tabHistory, newTab)
tab = newTab
} else if tab == "" {
tab = s.currentTab
} else {
// Update current tab if specified
s.currentTab = tab
// Move to end of history if it exists, otherwise add it
s.removeTabFromHistory(tab)
s.tabHistory = append(s.tabHistory, tab)
}
// Navigate to URL
err := s.client.LoadURL(tab, url, timeout)
if err != nil {
return ToolResult{}, fmt.Errorf("failed to navigate to %s: %w", url, err)
}
result := ToolResult{
Success: true,
Data: map[string]string{
"url": url,
"tab": tab,
},
}
// Take screenshot if requested
if screenshot {
screenshotPath := fmt.Sprintf("/tmp/navigate-%d.png", time.Now().Unix())
err = s.client.TakeScreenshot(tab, screenshotPath, false, timeout)
if err != nil {
// Don't fail the whole operation for screenshot errors
result.Metadata = map[string]string{
"screenshot_error": err.Error(),
}
} else {
result.Screenshot = screenshotPath
s.screenshots = append(s.screenshots, screenshotPath)
}
}
return result, nil
}
// handleInteract handles element interactions
func (s *MCPServer) handleInteract(params map[string]interface{}) (ToolResult, error) {
action := getStringParam(params, "action", "")
selector := getStringParam(params, "selector", "")
value := getStringParam(params, "value", "")
tab := s.resolveTabID(getStringParam(params, "tab", ""))
timeout := getIntParam(params, "timeout", 5)
if action == "" {
return ToolResult{}, fmt.Errorf("action parameter is required")
}
if selector == "" {
return ToolResult{}, fmt.Errorf("selector parameter is required")
}
if tab == "" {
return ToolResult{}, fmt.Errorf("no active tab available")
}
var err error
result := ToolResult{Success: true}
switch action {
case "click":
err = s.client.ClickElement(tab, selector, timeout, timeout)
result.Data = map[string]string{"action": "clicked", "selector": selector}
case "fill":
if value == "" {
return ToolResult{}, fmt.Errorf("value parameter is required for fill action")
}
err = s.client.FillFormField(tab, selector, value, timeout, timeout)
result.Data = map[string]string{"action": "filled", "selector": selector, "value": value}
case "submit":
err = s.client.SubmitForm(tab, selector, timeout, timeout)
result.Data = map[string]string{"action": "submitted", "selector": selector}
case "upload":
if value == "" {
return ToolResult{}, fmt.Errorf("value parameter is required for upload action")
}
err = s.client.UploadFile(tab, selector, value, timeout, timeout)
result.Data = map[string]string{"action": "uploaded", "selector": selector, "file": value}
default:
return ToolResult{}, fmt.Errorf("unknown action: %s", action)
}
if err != nil {
return ToolResult{}, fmt.Errorf("failed to %s element: %w", action, err)
}
return result, nil
}
// handleExtract handles data extraction
func (s *MCPServer) handleExtract(params map[string]interface{}) (ToolResult, error) {
extractType := getStringParam(params, "type", "")
selector := getStringParam(params, "selector", "")
code := getStringParam(params, "code", "")
tab := s.resolveTabID(getStringParam(params, "tab", ""))
timeout := getIntParam(params, "timeout", 5)
if extractType == "" {
return ToolResult{}, fmt.Errorf("type parameter is required")
}
if tab == "" {
return ToolResult{}, fmt.Errorf("no active tab available")
}
var data string
var err error
switch extractType {
case "source":
data, err = s.client.GetPageSource(tab, timeout)
case "element":
if selector == "" {
return ToolResult{}, fmt.Errorf("selector parameter is required for element type")
}
data, err = s.client.GetElementHTML(tab, selector, timeout)
case "javascript":
if code == "" {
return ToolResult{}, fmt.Errorf("code parameter is required for javascript type")
}
data, err = s.client.EvalJS(tab, code, timeout)
default:
return ToolResult{}, fmt.Errorf("unknown extract type: %s", extractType)
}
if err != nil {
return ToolResult{}, fmt.Errorf("failed to extract %s: %w", extractType, err)
}
return ToolResult{
Success: true,
Data: data,
}, nil
}
// handleScreenshot handles screenshot capture
func (s *MCPServer) handleScreenshot(params map[string]interface{}) (ToolResult, error) {
output := getStringParam(params, "output", "")
if output == "" {
output = fmt.Sprintf("/tmp/screenshot-%d.png", time.Now().Unix())
}
tab := s.resolveTabID(getStringParam(params, "tab", ""))
fullPage := getBoolParam(params, "full_page", false)
timeout := getIntParam(params, "timeout", 5)
if tab == "" {
return ToolResult{}, fmt.Errorf("no active tab available")
}
err := s.client.TakeScreenshot(tab, output, fullPage, timeout)
if err != nil {
return ToolResult{}, fmt.Errorf("failed to take screenshot: %w", err)
}
s.screenshots = append(s.screenshots, output)
return ToolResult{
Success: true,
Screenshot: output,
Data: map[string]interface{}{
"output": output,
"full_page": fullPage,
"tab": tab,
},
}, nil
}
// handleManageTabs handles tab management operations
func (s *MCPServer) handleManageTabs(params map[string]interface{}) (ToolResult, error) {
action := getStringParam(params, "action", "")
tab := getStringParam(params, "tab", "")
timeout := getIntParam(params, "timeout", 5)
if action == "" {
return ToolResult{}, fmt.Errorf("action parameter is required")
}
var data interface{}
var err error
switch action {
case "open":
newTab, err := s.client.OpenTab(timeout)
if err != nil {
return ToolResult{}, fmt.Errorf("failed to open tab: %w", err)
}
s.currentTab = newTab
s.tabHistory = append(s.tabHistory, newTab)
data = map[string]string{"tab": newTab, "action": "opened"}
case "close":
targetTab := s.resolveTabID(tab)
if targetTab == "" {
return ToolResult{}, fmt.Errorf("no tab to close")
}
err = s.client.CloseTab(targetTab, timeout)
if err != nil {
return ToolResult{}, fmt.Errorf("failed to close tab: %w", err)
}
s.removeTabFromHistory(targetTab)
data = map[string]string{"tab": targetTab, "action": "closed"}
case "list":
tabs, err := s.client.ListTabs()
if err != nil {
return ToolResult{}, fmt.Errorf("failed to list tabs: %w", err)
}
data = tabs
case "switch":
if tab == "" {
return ToolResult{}, fmt.Errorf("tab parameter is required for switch action")
}
s.currentTab = tab
s.removeTabFromHistory(tab)
s.tabHistory = append(s.tabHistory, tab)
data = map[string]string{"tab": tab, "action": "switched"}
default:
return ToolResult{}, fmt.Errorf("unknown tab action: %s", action)
}
if err != nil {
return ToolResult{}, err
}
return ToolResult{
Success: true,
Data: data,
}, nil
}
// handleIframe handles iframe context switching
func (s *MCPServer) handleIframe(params map[string]interface{}) (ToolResult, error) {
action := getStringParam(params, "action", "")
selector := getStringParam(params, "selector", "")
tab := s.resolveTabID(getStringParam(params, "tab", ""))
if action == "" {
return ToolResult{}, fmt.Errorf("action parameter is required")
}
if tab == "" {
return ToolResult{}, fmt.Errorf("no active tab available")
}
var err error
var data map[string]string
switch action {
case "enter":
if selector == "" {
return ToolResult{}, fmt.Errorf("selector parameter is required for enter action")
}
err = s.client.SwitchToIframe(tab, selector)
s.iframeMode = true
data = map[string]string{"action": "entered", "selector": selector}
case "exit":
err = s.client.SwitchToMain(tab)
s.iframeMode = false
data = map[string]string{"action": "exited"}
default:
return ToolResult{}, fmt.Errorf("unknown iframe action: %s", action)
}
if err != nil {
return ToolResult{}, fmt.Errorf("failed to %s iframe: %w", action, err)
}
return ToolResult{
Success: true,
Data: data,
}, nil
}
// removeTabFromHistory removes a tab from history and updates current tab
func (s *MCPServer) removeTabFromHistory(tabID string) {
for i, id := range s.tabHistory {
if id == tabID {
s.tabHistory = append(s.tabHistory[:i], s.tabHistory[i+1:]...)
break
}
}
if s.currentTab == tabID {
if len(s.tabHistory) > 0 {
s.currentTab = s.tabHistory[len(s.tabHistory)-1]
} else {
s.currentTab = ""
}
}
}
func main() {
// Initialize debug logger
var err error
debugLogger, err = NewDebugLogger()
if err != nil {
log.Printf("Warning: Failed to initialize debug logger: %v", err)
// Continue without debug logging
} else {
defer debugLogger.Close()
debugLogger.Log("MCP STDIO Server starting up")
}
// Set up signal handling for graceful shutdown
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
host := os.Getenv("CREMOTE_HOST")
if host == "" {
host = "localhost"
}
portStr := os.Getenv("CREMOTE_PORT")
port := 8989
if portStr != "" {
if p, err := strconv.Atoi(portStr); err == nil {
port = p
}
}
debugLogger.Log("Connecting to cremote daemon at %s:%d", host, port)
log.Printf("Starting MCP stdio server, connecting to cremote daemon at %s:%d", host, port)
server := NewMCPServer(host, port)
// Create a buffered reader for better EOF handling
reader := bufio.NewReader(os.Stdin)
log.Printf("MCP stdio server ready, waiting for requests...")
// Channel to signal when to stop reading
done := make(chan bool)
// Goroutine to handle stdin reading
go func() {
defer close(done)
for {
// Read headers for Content-Length framing (use the same reader for headers and body)
contentLength := -1
for {
line, err := reader.ReadString('\n')
if err != nil {
if err == io.EOF {
log.Printf("Input stream closed, shutting down MCP server")
return
}
log.Printf("Error reading header: %v", err)
return
}
line = strings.TrimRight(line, "\r\n")
if line == "" {
break // end of headers
}
const prefix = "content-length: "
low := strings.ToLower(line)
if strings.HasPrefix(low, prefix) {
var err error
contentLength, err = strconv.Atoi(strings.TrimSpace(low[len(prefix):]))
if err != nil {
log.Printf("Invalid Content-Length: %v", err)
return
}
}
}
if contentLength < 0 {
log.Printf("Missing Content-Length header")
return
}
// Read body of specified length
debugLogger.Log("Reading request body of length: %d", contentLength)
body := make([]byte, contentLength)
if _, err := io.ReadFull(reader, body); err != nil {
debugLogger.Log("Error reading body: %v", err)
log.Printf("Error reading body: %v", err)
return
}
debugLogger.Log("Raw request body: %s", string(body))
var req MCPRequest
if err := json.Unmarshal(body, &req); err != nil {
debugLogger.Log("Error decoding request body: %v", err)
log.Printf("Error decoding request body: %v", err)
continue
}
debugLogger.Log("Successfully parsed request: %s (ID: %v)", req.Method, req.ID)
log.Printf("Processing request: %s (ID: %v)", req.Method, req.ID)
resp := server.HandleRequest(req)
resp.JSONRPC = "2.0"
// Write Content-Length framed response
responseBytes, err := json.Marshal(resp)
if err != nil {
debugLogger.Log("Error marshaling response: %v", err)
log.Printf("Error marshaling response: %v", err)
continue
}
debugLogger.Log("Sending response with Content-Length: %d", len(responseBytes))
debugLogger.Log("Raw response: %s", string(responseBytes))
fmt.Fprintf(os.Stdout, "Content-Length: %d\r\n\r\n", len(responseBytes))
os.Stdout.Write(responseBytes)
debugLogger.Log("Response sent successfully for request: %s", req.Method)
log.Printf("Completed request: %s", req.Method)
}
}()
// Wait for either completion or signal
select {
case <-done:
log.Printf("Input processing completed")
case sig := <-sigChan:
log.Printf("Received signal %v, shutting down", sig)
}
log.Printf("MCP stdio server shutdown complete")
}

822
mcp/backup/server.go Normal file
View File

@ -0,0 +1,822 @@
//go:build mcp_http
package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"path/filepath"
"strconv"
"time"
"git.teamworkapps.com/shortcut/cremote/client"
)
// DebugLogger handles debug logging to file
type DebugLogger struct {
file *os.File
logger *log.Logger
}
// NewDebugLogger creates a new debug logger
func NewDebugLogger() (*DebugLogger, error) {
logDir := "/tmp/cremote-mcp-logs"
if err := os.MkdirAll(logDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create log directory: %w", err)
}
logFile := filepath.Join(logDir, fmt.Sprintf("mcp-http-%d.log", time.Now().Unix()))
file, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
if err != nil {
return nil, fmt.Errorf("failed to open log file: %w", err)
}
logger := log.New(file, "", log.LstdFlags|log.Lmicroseconds|log.Lshortfile)
logger.Printf("=== MCP HTTP Server Debug Log Started ===")
return &DebugLogger{
file: file,
logger: logger,
}, nil
}
// Log writes a debug message
func (d *DebugLogger) Log(format string, args ...interface{}) {
if d != nil && d.logger != nil {
d.logger.Printf(format, args...)
}
}
// LogJSON logs a JSON object with a label
func (d *DebugLogger) LogJSON(label string, obj interface{}) {
if d != nil && d.logger != nil {
jsonBytes, err := json.MarshalIndent(obj, "", " ")
if err != nil {
d.logger.Printf("%s: JSON marshal error: %v", label, err)
} else {
d.logger.Printf("%s:\n%s", label, string(jsonBytes))
}
}
}
// Close closes the debug logger
func (d *DebugLogger) Close() {
if d != nil && d.file != nil {
d.logger.Printf("=== MCP HTTP Server Debug Log Ended ===")
d.file.Close()
}
}
var debugLogger *DebugLogger
// MCPServer wraps the cremote client with MCP protocol
type MCPServer struct {
client *client.Client
currentTab string
tabHistory []string
iframeMode bool
lastError string
screenshots []string
}
// MCPRequest represents an incoming MCP request
type MCPRequest struct {
Method string `json:"method"`
Params map[string]interface{} `json:"params"`
ID interface{} `json:"id"`
}
// MCPResponse represents an MCP response
type MCPResponse struct {
Result interface{} `json:"result,omitempty"`
Error *MCPError `json:"error,omitempty"`
ID interface{} `json:"id"`
}
// MCPError represents an MCP error
type MCPError struct {
Code int `json:"code"`
Message string `json:"message"`
}
// ToolResult represents the result of a tool execution
type ToolResult struct {
Success bool `json:"success"`
Data interface{} `json:"data,omitempty"`
Screenshot string `json:"screenshot,omitempty"`
CurrentTab string `json:"current_tab,omitempty"`
TabHistory []string `json:"tab_history,omitempty"`
IframeMode bool `json:"iframe_mode"`
Error string `json:"error,omitempty"`
Metadata map[string]string `json:"metadata,omitempty"`
}
// NewMCPServer creates a new MCP server instance
func NewMCPServer(host string, port int) *MCPServer {
return &MCPServer{
client: client.NewClient(host, port),
tabHistory: make([]string, 0),
screenshots: make([]string, 0),
}
}
// HandleRequest processes an MCP request
func (s *MCPServer) HandleRequest(req MCPRequest) MCPResponse {
debugLogger.Log("HandleRequest called with method: %s, ID: %v", req.Method, req.ID)
debugLogger.LogJSON("Incoming Request", req)
var resp MCPResponse
switch req.Method {
case "initialize":
debugLogger.Log("Handling initialize request")
resp = s.handleInitialize(req)
case "tools/list":
debugLogger.Log("Handling tools/list request")
resp = s.handleToolsList(req)
case "tools/call":
debugLogger.Log("Handling tools/call request")
resp = s.handleToolCall(req)
default:
debugLogger.Log("Unknown method: %s", req.Method)
resp = MCPResponse{
Error: &MCPError{Code: -32601, Message: "Method not found"},
ID: req.ID,
}
}
debugLogger.LogJSON("Response", resp)
debugLogger.Log("HandleRequest completed for method: %s", req.Method)
return resp
}
// handleInitialize handles the MCP initialize request
func (s *MCPServer) handleInitialize(req MCPRequest) MCPResponse {
return MCPResponse{
Result: map[string]interface{}{
"protocolVersion": "2024-11-05",
"capabilities": map[string]interface{}{
"tools": map[string]interface{}{
"listChanged": true,
},
},
"serverInfo": map[string]interface{}{
"name": "cremote-mcp",
"version": "1.0.0",
},
},
ID: req.ID,
}
}
// handleToolsList returns the list of available tools
func (s *MCPServer) handleToolsList(req MCPRequest) MCPResponse {
tools := []map[string]interface{}{
{
"name": "web_navigate",
"description": "Navigate to a URL and optionally take a screenshot",
"inputSchema": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"url": map[string]interface{}{"type": "string", "description": "URL to navigate to"},
"tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional, uses current tab)"},
"screenshot": map[string]interface{}{"type": "boolean", "description": "Take screenshot after navigation"},
"timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5},
},
"required": []string{"url"},
},
},
{
"name": "web_interact",
"description": "Interact with web elements (click, fill, submit)",
"inputSchema": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"action": map[string]interface{}{"type": "string", "enum": []string{"click", "fill", "submit", "upload"}},
"selector": map[string]interface{}{"type": "string", "description": "CSS selector for the element"},
"value": map[string]interface{}{"type": "string", "description": "Value to fill (for fill/upload actions)"},
"tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional)"},
"timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5},
},
"required": []string{"action", "selector"},
},
},
{
"name": "web_extract",
"description": "Extract data from the page (source, element HTML, or execute JavaScript)",
"inputSchema": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"type": map[string]interface{}{"type": "string", "enum": []string{"source", "element", "javascript"}},
"selector": map[string]interface{}{"type": "string", "description": "CSS selector (for element type)"},
"code": map[string]interface{}{"type": "string", "description": "JavaScript code (for javascript type)"},
"tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional)"},
"timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5},
},
"required": []string{"type"},
},
},
{
"name": "web_screenshot",
"description": "Take a screenshot of the current page",
"inputSchema": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"output": map[string]interface{}{"type": "string", "description": "Output file path"},
"full_page": map[string]interface{}{"type": "boolean", "description": "Capture full page", "default": false},
"tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional)"},
"timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5},
},
"required": []string{"output"},
},
},
{
"name": "web_manage_tabs",
"description": "Manage browser tabs (open, close, list, switch)",
"inputSchema": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"action": map[string]interface{}{"type": "string", "enum": []string{"open", "close", "list", "switch"}},
"tab": map[string]interface{}{"type": "string", "description": "Tab ID (for close/switch actions)"},
"timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5},
},
"required": []string{"action"},
},
},
{
"name": "web_iframe",
"description": "Switch iframe context for subsequent operations",
"inputSchema": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"action": map[string]interface{}{"type": "string", "enum": []string{"enter", "exit"}},
"selector": map[string]interface{}{"type": "string", "description": "Iframe CSS selector (for enter action)"},
"tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional)"},
},
"required": []string{"action"},
},
},
}
return MCPResponse{
Result: map[string]interface{}{"tools": tools},
ID: req.ID,
}
}
// handleToolCall executes a tool call
func (s *MCPServer) handleToolCall(req MCPRequest) MCPResponse {
params, ok := req.Params["arguments"].(map[string]interface{})
if !ok {
return MCPResponse{
Error: &MCPError{Code: -32602, Message: "Invalid parameters"},
ID: req.ID,
}
}
toolName, ok := req.Params["name"].(string)
if !ok {
return MCPResponse{
Error: &MCPError{Code: -32602, Message: "Tool name required"},
ID: req.ID,
}
}
var result ToolResult
var err error
switch toolName {
case "web_navigate":
result, err = s.handleNavigate(params)
case "web_interact":
result, err = s.handleInteract(params)
case "web_extract":
result, err = s.handleExtract(params)
case "web_screenshot":
result, err = s.handleScreenshot(params)
case "web_manage_tabs":
result, err = s.handleManageTabs(params)
case "web_iframe":
result, err = s.handleIframe(params)
default:
return MCPResponse{
Error: &MCPError{Code: -32601, Message: "Unknown tool: " + toolName},
ID: req.ID,
}
}
if err != nil {
result.Success = false
result.Error = err.Error()
s.lastError = err.Error()
}
// Always include current state in response
result.CurrentTab = s.currentTab
result.TabHistory = s.tabHistory
result.IframeMode = s.iframeMode
return MCPResponse{
Result: result,
ID: req.ID,
}
}
// Helper function to get string parameter with default
func getStringParam(params map[string]interface{}, key, defaultValue string) string {
if val, ok := params[key].(string); ok {
return val
}
return defaultValue
}
// Helper function to get int parameter with default
func getIntParam(params map[string]interface{}, key string, defaultValue int) int {
if val, ok := params[key].(float64); ok {
return int(val)
}
if val, ok := params[key].(int); ok {
return val
}
return defaultValue
}
// Helper function to get bool parameter with default
func getBoolParam(params map[string]interface{}, key string, defaultValue bool) bool {
if val, ok := params[key].(bool); ok {
return val
}
return defaultValue
}
// Helper function to resolve tab ID
func (s *MCPServer) resolveTabID(tabParam string) string {
if tabParam != "" {
return tabParam
}
return s.currentTab
}
// handleNavigate handles web navigation
func (s *MCPServer) handleNavigate(params map[string]interface{}) (ToolResult, error) {
url := getStringParam(params, "url", "")
if url == "" {
return ToolResult{}, fmt.Errorf("url parameter is required")
}
tab := getStringParam(params, "tab", "")
timeout := getIntParam(params, "timeout", 5)
takeScreenshot := getBoolParam(params, "screenshot", false)
// If no tab specified and we don't have a current tab, open one
if tab == "" && s.currentTab == "" {
newTab, err := s.client.OpenTab(timeout)
if err != nil {
return ToolResult{}, fmt.Errorf("failed to open new tab: %w", err)
}
s.currentTab = newTab
s.tabHistory = append(s.tabHistory, newTab)
tab = newTab
} else if tab == "" {
tab = s.currentTab
}
// Load the URL
err := s.client.LoadURL(tab, url, timeout)
if err != nil {
return ToolResult{}, fmt.Errorf("failed to load URL: %w", err)
}
result := ToolResult{
Success: true,
Data: map[string]string{"url": url, "tab": tab},
}
// Take screenshot if requested
if takeScreenshot {
screenshotPath := fmt.Sprintf("/tmp/navigate-%d.png", time.Now().Unix())
err = s.client.TakeScreenshot(tab, screenshotPath, false, timeout)
if err == nil {
result.Screenshot = screenshotPath
s.screenshots = append(s.screenshots, screenshotPath)
}
}
return result, nil
}
// handleInteract handles web element interactions
func (s *MCPServer) handleInteract(params map[string]interface{}) (ToolResult, error) {
action := getStringParam(params, "action", "")
selector := getStringParam(params, "selector", "")
value := getStringParam(params, "value", "")
tab := s.resolveTabID(getStringParam(params, "tab", ""))
timeout := getIntParam(params, "timeout", 5)
if action == "" || selector == "" {
return ToolResult{}, fmt.Errorf("action and selector parameters are required")
}
if tab == "" {
return ToolResult{}, fmt.Errorf("no active tab available")
}
var err error
result := ToolResult{Success: true}
switch action {
case "click":
err = s.client.ClickElement(tab, selector, timeout, timeout)
result.Data = map[string]string{"action": "clicked", "selector": selector}
case "fill":
if value == "" {
return ToolResult{}, fmt.Errorf("value parameter is required for fill action")
}
err = s.client.FillFormField(tab, selector, value, timeout, timeout)
result.Data = map[string]string{"action": "filled", "selector": selector, "value": value}
case "submit":
err = s.client.SubmitForm(tab, selector, timeout, timeout)
result.Data = map[string]string{"action": "submitted", "selector": selector}
case "upload":
if value == "" {
return ToolResult{}, fmt.Errorf("value parameter (file path) is required for upload action")
}
err = s.client.UploadFile(tab, selector, value, timeout, timeout)
result.Data = map[string]string{"action": "uploaded", "selector": selector, "file": value}
default:
return ToolResult{}, fmt.Errorf("unknown action: %s", action)
}
if err != nil {
return ToolResult{}, fmt.Errorf("failed to %s element: %w", action, err)
}
return result, nil
}
// handleExtract handles data extraction from pages
func (s *MCPServer) handleExtract(params map[string]interface{}) (ToolResult, error) {
extractType := getStringParam(params, "type", "")
selector := getStringParam(params, "selector", "")
code := getStringParam(params, "code", "")
tab := s.resolveTabID(getStringParam(params, "tab", ""))
timeout := getIntParam(params, "timeout", 5)
if extractType == "" {
return ToolResult{}, fmt.Errorf("type parameter is required")
}
if tab == "" {
return ToolResult{}, fmt.Errorf("no active tab available")
}
var data interface{}
var err error
switch extractType {
case "source":
data, err = s.client.GetPageSource(tab, timeout)
case "element":
if selector == "" {
return ToolResult{}, fmt.Errorf("selector parameter is required for element extraction")
}
data, err = s.client.GetElementHTML(tab, selector, timeout)
case "javascript":
if code == "" {
return ToolResult{}, fmt.Errorf("code parameter is required for javascript extraction")
}
data, err = s.client.EvalJS(tab, code, timeout)
default:
return ToolResult{}, fmt.Errorf("unknown extraction type: %s", extractType)
}
if err != nil {
return ToolResult{}, fmt.Errorf("failed to extract %s: %w", extractType, err)
}
return ToolResult{
Success: true,
Data: data,
Metadata: map[string]string{
"type": extractType,
"selector": selector,
},
}, nil
}
// handleScreenshot handles screenshot capture
func (s *MCPServer) handleScreenshot(params map[string]interface{}) (ToolResult, error) {
output := getStringParam(params, "output", "")
if output == "" {
output = fmt.Sprintf("/tmp/screenshot-%d.png", time.Now().Unix())
}
tab := s.resolveTabID(getStringParam(params, "tab", ""))
fullPage := getBoolParam(params, "full_page", false)
timeout := getIntParam(params, "timeout", 5)
if tab == "" {
return ToolResult{}, fmt.Errorf("no active tab available")
}
err := s.client.TakeScreenshot(tab, output, fullPage, timeout)
if err != nil {
return ToolResult{}, fmt.Errorf("failed to take screenshot: %w", err)
}
s.screenshots = append(s.screenshots, output)
return ToolResult{
Success: true,
Screenshot: output,
Data: map[string]interface{}{
"output": output,
"full_page": fullPage,
"tab": tab,
},
}, nil
}
// handleManageTabs handles tab management operations
func (s *MCPServer) handleManageTabs(params map[string]interface{}) (ToolResult, error) {
action := getStringParam(params, "action", "")
tab := getStringParam(params, "tab", "")
timeout := getIntParam(params, "timeout", 5)
if action == "" {
return ToolResult{}, fmt.Errorf("action parameter is required")
}
var data interface{}
var err error
switch action {
case "open":
newTab, err := s.client.OpenTab(timeout)
if err != nil {
return ToolResult{}, fmt.Errorf("failed to open tab: %w", err)
}
s.currentTab = newTab
s.tabHistory = append(s.tabHistory, newTab)
data = map[string]string{"tab": newTab, "action": "opened"}
case "close":
targetTab := s.resolveTabID(tab)
if targetTab == "" {
return ToolResult{}, fmt.Errorf("no tab to close")
}
err = s.client.CloseTab(targetTab, timeout)
if err != nil {
return ToolResult{}, fmt.Errorf("failed to close tab: %w", err)
}
// Remove from history and update current tab
s.removeTabFromHistory(targetTab)
data = map[string]string{"tab": targetTab, "action": "closed"}
case "list":
tabs, err := s.client.ListTabs()
if err != nil {
return ToolResult{}, fmt.Errorf("failed to list tabs: %w", err)
}
data = tabs
case "switch":
if tab == "" {
return ToolResult{}, fmt.Errorf("tab parameter is required for switch action")
}
s.currentTab = tab
// Move to end of history if it exists, otherwise add it
s.removeTabFromHistory(tab)
s.tabHistory = append(s.tabHistory, tab)
data = map[string]string{"tab": tab, "action": "switched"}
default:
return ToolResult{}, fmt.Errorf("unknown tab action: %s", action)
}
if err != nil {
return ToolResult{}, err
}
return ToolResult{
Success: true,
Data: data,
}, nil
}
// handleIframe handles iframe context switching
func (s *MCPServer) handleIframe(params map[string]interface{}) (ToolResult, error) {
action := getStringParam(params, "action", "")
selector := getStringParam(params, "selector", "")
tab := s.resolveTabID(getStringParam(params, "tab", ""))
if action == "" {
return ToolResult{}, fmt.Errorf("action parameter is required")
}
if tab == "" {
return ToolResult{}, fmt.Errorf("no active tab available")
}
var err error
var data map[string]string
switch action {
case "enter":
if selector == "" {
return ToolResult{}, fmt.Errorf("selector parameter is required for enter action")
}
err = s.client.SwitchToIframe(tab, selector)
s.iframeMode = true
data = map[string]string{"action": "entered", "selector": selector}
case "exit":
err = s.client.SwitchToMain(tab)
s.iframeMode = false
data = map[string]string{"action": "exited"}
default:
return ToolResult{}, fmt.Errorf("unknown iframe action: %s", action)
}
if err != nil {
return ToolResult{}, fmt.Errorf("failed to %s iframe: %w", action, err)
}
return ToolResult{
Success: true,
Data: data,
}, nil
}
// Helper function to remove tab from history
func (s *MCPServer) removeTabFromHistory(tabID string) {
for i, id := range s.tabHistory {
if id == tabID {
s.tabHistory = append(s.tabHistory[:i], s.tabHistory[i+1:]...)
break
}
}
// If we removed the current tab, set current to the last in history
if s.currentTab == tabID {
if len(s.tabHistory) > 0 {
s.currentTab = s.tabHistory[len(s.tabHistory)-1]
} else {
s.currentTab = ""
}
}
}
// HTTP handler for MCP requests
func (s *MCPServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
debugLogger.Log("ServeHTTP: Received %s request from %s", r.Method, r.RemoteAddr)
debugLogger.Log("ServeHTTP: Request URL: %s", r.URL.String())
debugLogger.Log("ServeHTTP: Request headers: %v", r.Header)
// Set CORS headers for browser compatibility
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Access-Control-Allow-Methods", "POST, OPTIONS")
w.Header().Set("Access-Control-Allow-Headers", "Content-Type")
w.Header().Set("Content-Type", "application/json")
// Handle preflight requests
if r.Method == "OPTIONS" {
debugLogger.Log("ServeHTTP: Handling OPTIONS preflight request")
w.WriteHeader(http.StatusOK)
return
}
// Only accept POST requests
if r.Method != "POST" {
debugLogger.Log("ServeHTTP: Method not allowed: %s", r.Method)
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
// Read and decode the request
var req MCPRequest
decoder := json.NewDecoder(r.Body)
if err := decoder.Decode(&req); err != nil {
debugLogger.Log("ServeHTTP: Error decoding request: %v", err)
log.Printf("Error decoding request: %v", err)
http.Error(w, "Invalid JSON", http.StatusBadRequest)
return
}
debugLogger.LogJSON("HTTP Request", req)
log.Printf("Processing HTTP request: %s (ID: %v)", req.Method, req.ID)
// Handle the request
resp := s.HandleRequest(req)
debugLogger.LogJSON("HTTP Response", resp)
// Encode and send the response
encoder := json.NewEncoder(w)
if err := encoder.Encode(resp); err != nil {
debugLogger.Log("ServeHTTP: Error encoding response: %v", err)
log.Printf("Error encoding response: %v", err)
http.Error(w, "Internal server error", http.StatusInternalServerError)
return
}
debugLogger.Log("ServeHTTP: Response sent successfully for request: %s", req.Method)
log.Printf("Completed HTTP request: %s", req.Method)
}
func main() {
// Initialize debug logger
var err error
debugLogger, err = NewDebugLogger()
if err != nil {
log.Printf("Warning: Failed to initialize debug logger: %v", err)
// Continue without debug logging
} else {
defer debugLogger.Close()
debugLogger.Log("MCP HTTP Server starting up")
}
// Get cremote daemon connection settings
cremoteHost := os.Getenv("CREMOTE_HOST")
if cremoteHost == "" {
cremoteHost = "localhost"
}
cremotePortStr := os.Getenv("CREMOTE_PORT")
cremotePort := 8989
if cremotePortStr != "" {
if p, err := strconv.Atoi(cremotePortStr); err == nil {
cremotePort = p
}
}
// Get HTTP server settings
httpHost := os.Getenv("MCP_HOST")
if httpHost == "" {
httpHost = "localhost"
}
httpPortStr := os.Getenv("MCP_PORT")
httpPort := 8990
if httpPortStr != "" {
if p, err := strconv.Atoi(httpPortStr); err == nil {
httpPort = p
}
}
debugLogger.Log("HTTP server will listen on %s:%d", httpHost, httpPort)
debugLogger.Log("Connecting to cremote daemon at %s:%d", cremoteHost, cremotePort)
log.Printf("Starting MCP HTTP server on %s:%d", httpHost, httpPort)
log.Printf("Connecting to cremote daemon at %s:%d", cremoteHost, cremotePort)
// Create the MCP server
mcpServer := NewMCPServer(cremoteHost, cremotePort)
// Set up HTTP routes
http.Handle("/mcp", mcpServer)
// Health check endpoint
http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]string{
"status": "healthy",
"cremote_host": cremoteHost,
"cremote_port": strconv.Itoa(cremotePort),
})
})
// Root endpoint with basic info
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"service": "Cremote MCP Server",
"version": "1.0.0",
"endpoints": map[string]string{
"mcp": "/mcp",
"health": "/health",
},
"cremote_daemon": fmt.Sprintf("%s:%d", cremoteHost, cremotePort),
})
})
// Start the HTTP server
addr := fmt.Sprintf("%s:%d", httpHost, httpPort)
log.Printf("MCP HTTP server listening on http://%s", addr)
log.Printf("MCP endpoint: http://%s/mcp", addr)
log.Printf("Health check: http://%s/health", addr)
if err := http.ListenAndServe(addr, nil); err != nil {
log.Fatalf("HTTP server failed: %v", err)
}
}

View File

@ -0,0 +1,11 @@
{
"mcpServers": {
"cremote": {
"command": "/path/to/cremote/mcp/cremote-mcp",
"env": {
"CREMOTE_HOST": "localhost",
"CREMOTE_PORT": "8989"
}
}
}
}

BIN
mcp/cremote-mcp Executable file

Binary file not shown.

22
mcp/go.mod Normal file
View File

@ -0,0 +1,22 @@
module git.teamworkapps.com/shortcut/cremote/mcp
go 1.24.1
replace git.teamworkapps.com/shortcut/cremote => ../
require (
git.teamworkapps.com/shortcut/cremote v0.0.0-00010101000000-000000000000
github.com/mark3labs/mcp-go v0.37.0
)
require (
github.com/bahlo/generic-list-go v0.2.0 // indirect
github.com/buger/jsonparser v1.1.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/invopop/jsonschema v0.13.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/spf13/cast v1.7.1 // indirect
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

39
mcp/go.sum Normal file
View File

@ -0,0 +1,39 @@
github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mark3labs/mcp-go v0.37.0 h1:BywvZLPRT6Zx6mMG/MJfxLSZQkTGIcJSEGKsvr4DsoQ=
github.com/mark3labs/mcp-go v0.37.0/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y=
github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

695
mcp/main.go Normal file
View File

@ -0,0 +1,695 @@
package main
import (
"context"
"fmt"
"log"
"os"
"strconv"
"time"
"git.teamworkapps.com/shortcut/cremote/client"
"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
)
// CremoteServer wraps the cremote client for MCP
type CremoteServer struct {
client *client.Client
currentTab string
tabHistory []string
iframeMode bool
screenshots []string
}
// NewCremoteServer creates a new cremote MCP server
func NewCremoteServer(host string, port int) *CremoteServer {
return &CremoteServer{
client: client.NewClient(host, port),
tabHistory: make([]string, 0),
screenshots: make([]string, 0),
}
}
// Helper functions for parameter extraction
func getStringParam(params map[string]any, key, defaultValue string) string {
if val, ok := params[key].(string); ok {
return val
}
return defaultValue
}
func getBoolParam(params map[string]any, key string, defaultValue bool) bool {
if val, ok := params[key].(bool); ok {
return val
}
return defaultValue
}
func getIntParam(params map[string]any, key string, defaultValue int) int {
if val, ok := params[key].(float64); ok {
return int(val)
}
if val, ok := params[key].(int); ok {
return val
}
return defaultValue
}
func main() {
// Get cremote daemon connection settings
cremoteHost := os.Getenv("CREMOTE_HOST")
if cremoteHost == "" {
cremoteHost = "localhost"
}
cremotePortStr := os.Getenv("CREMOTE_PORT")
cremotePort := 8989
if cremotePortStr != "" {
if p, err := strconv.Atoi(cremotePortStr); err == nil {
cremotePort = p
}
}
log.Printf("Starting cremote MCP server, connecting to cremote daemon at %s:%d", cremoteHost, cremotePort)
// Create the cremote server
cremoteServer := NewCremoteServer(cremoteHost, cremotePort)
// Create MCP server
mcpServer := server.NewMCPServer("cremote-mcp", "1.0.0")
// Register web_navigate tool
mcpServer.AddTool(mcp.Tool{
Name: "web_navigate",
Description: "Navigate to a URL and optionally take a screenshot",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"url": map[string]any{
"type": "string",
"description": "URL to navigate to",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional, uses current tab)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
"screenshot": map[string]any{
"type": "boolean",
"description": "Take screenshot after navigation",
},
},
Required: []string{"url"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
url := getStringParam(params, "url", "")
if url == "" {
return nil, fmt.Errorf("url parameter is required")
}
tab := getStringParam(params, "tab", cremoteServer.currentTab)
timeout := getIntParam(params, "timeout", 5)
takeScreenshot := getBoolParam(params, "screenshot", false)
// If no tab specified and no current tab, create a new one
if tab == "" {
newTab, err := cremoteServer.client.OpenTab(timeout)
if err != nil {
return nil, fmt.Errorf("failed to create new tab: %w", err)
}
tab = newTab
cremoteServer.currentTab = tab
cremoteServer.tabHistory = append(cremoteServer.tabHistory, tab)
}
// Load the URL
err := cremoteServer.client.LoadURL(tab, url, timeout)
if err != nil {
return nil, fmt.Errorf("failed to load URL: %w", err)
}
message := fmt.Sprintf("Successfully navigated to %s in tab %s", url, tab)
// Take screenshot if requested
if takeScreenshot {
screenshotPath := fmt.Sprintf("/tmp/navigate-%d.png", time.Now().Unix())
err = cremoteServer.client.TakeScreenshot(tab, screenshotPath, false, timeout)
if err == nil {
cremoteServer.screenshots = append(cremoteServer.screenshots, screenshotPath)
message += fmt.Sprintf(" (screenshot saved to %s)", screenshotPath)
}
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(message),
},
IsError: false,
}, nil
})
// Register web_interact tool
mcpServer.AddTool(mcp.Tool{
Name: "web_interact",
Description: "Interact with web elements (click, fill, submit)",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"action": map[string]any{
"type": "string",
"description": "Action to perform",
"enum": []any{"click", "fill", "submit", "upload"},
},
"selector": map[string]any{
"type": "string",
"description": "CSS selector for the element",
},
"value": map[string]any{
"type": "string",
"description": "Value to fill (for fill/upload actions)",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
Required: []string{"action", "selector"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
action := getStringParam(params, "action", "")
selector := getStringParam(params, "selector", "")
value := getStringParam(params, "value", "")
tab := getStringParam(params, "tab", cremoteServer.currentTab)
timeout := getIntParam(params, "timeout", 5)
if action == "" {
return nil, fmt.Errorf("action parameter is required")
}
if selector == "" {
return nil, fmt.Errorf("selector parameter is required")
}
if tab == "" {
return nil, fmt.Errorf("no tab available - navigate to a page first")
}
var err error
var message string
switch action {
case "click":
err = cremoteServer.client.ClickElement(tab, selector, timeout, timeout)
message = fmt.Sprintf("Clicked element %s", selector)
case "fill":
if value == "" {
return nil, fmt.Errorf("value parameter is required for fill action")
}
err = cremoteServer.client.FillFormField(tab, selector, value, timeout, timeout)
message = fmt.Sprintf("Filled element %s with value", selector)
case "submit":
err = cremoteServer.client.SubmitForm(tab, selector, timeout, timeout)
message = fmt.Sprintf("Submitted form %s", selector)
case "upload":
if value == "" {
return nil, fmt.Errorf("value parameter (file path) is required for upload action")
}
err = cremoteServer.client.UploadFile(tab, selector, value, timeout, timeout)
message = fmt.Sprintf("Uploaded file %s to element %s", value, selector)
default:
return nil, fmt.Errorf("unknown action: %s", action)
}
if err != nil {
return nil, fmt.Errorf("failed to %s element: %w", action, err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(message),
},
IsError: false,
}, nil
})
// Register web_extract tool
mcpServer.AddTool(mcp.Tool{
Name: "web_extract",
Description: "Extract data from the page (source, element HTML, or execute JavaScript)",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"type": map[string]any{
"type": "string",
"description": "Type of extraction",
"enum": []any{"source", "element", "javascript"},
},
"selector": map[string]any{
"type": "string",
"description": "CSS selector (for element type)",
},
"code": map[string]any{
"type": "string",
"description": "JavaScript code (for javascript type)",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
Required: []string{"type"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
extractType := getStringParam(params, "type", "")
selector := getStringParam(params, "selector", "")
code := getStringParam(params, "code", "")
tab := getStringParam(params, "tab", cremoteServer.currentTab)
timeout := getIntParam(params, "timeout", 5)
if extractType == "" {
return nil, fmt.Errorf("type parameter is required")
}
if tab == "" {
return nil, fmt.Errorf("no tab available - navigate to a page first")
}
var data string
var err error
switch extractType {
case "source":
data, err = cremoteServer.client.GetPageSource(tab, timeout)
case "element":
if selector == "" {
return nil, fmt.Errorf("selector parameter is required for element extraction")
}
data, err = cremoteServer.client.GetElementHTML(tab, selector, timeout)
case "javascript":
if code == "" {
return nil, fmt.Errorf("code parameter is required for javascript extraction")
}
data, err = cremoteServer.client.EvalJS(tab, code, timeout)
default:
return nil, fmt.Errorf("unknown extraction type: %s", extractType)
}
if err != nil {
return nil, fmt.Errorf("failed to extract %s: %w", extractType, err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("Extracted %s data: %s", extractType, data)),
},
IsError: false,
}, nil
})
// Register web_screenshot tool
mcpServer.AddTool(mcp.Tool{
Name: "web_screenshot",
Description: "Take a screenshot of the current page",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"output": map[string]any{
"type": "string",
"description": "Output file path",
},
"full_page": map[string]any{
"type": "boolean",
"description": "Capture full page",
"default": false,
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
Required: []string{"output"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
output := getStringParam(params, "output", "")
fullPage := getBoolParam(params, "full_page", false)
tab := getStringParam(params, "tab", cremoteServer.currentTab)
timeout := getIntParam(params, "timeout", 5)
if output == "" {
return nil, fmt.Errorf("output parameter is required")
}
if tab == "" {
return nil, fmt.Errorf("no tab available - navigate to a page first")
}
err := cremoteServer.client.TakeScreenshot(tab, output, fullPage, timeout)
if err != nil {
return nil, fmt.Errorf("failed to take screenshot: %w", err)
}
cremoteServer.screenshots = append(cremoteServer.screenshots, output)
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("Screenshot saved to %s", output)),
},
IsError: false,
}, nil
})
// Register web_manage_tabs tool
mcpServer.AddTool(mcp.Tool{
Name: "web_manage_tabs",
Description: "Manage browser tabs (open, close, list, switch)",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"action": map[string]any{
"type": "string",
"description": "Action to perform",
"enum": []any{"open", "close", "list", "switch"},
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (for close/switch actions)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
Required: []string{"action"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
action := getStringParam(params, "action", "")
tab := getStringParam(params, "tab", "")
timeout := getIntParam(params, "timeout", 5)
if action == "" {
return nil, fmt.Errorf("action parameter is required")
}
var err error
var message string
switch action {
case "open":
newTab, err := cremoteServer.client.OpenTab(timeout)
if err != nil {
return nil, fmt.Errorf("failed to open new tab: %w", err)
}
cremoteServer.currentTab = newTab
cremoteServer.tabHistory = append(cremoteServer.tabHistory, newTab)
message = fmt.Sprintf("Opened new tab: %s", newTab)
case "close":
if tab == "" {
return nil, fmt.Errorf("tab parameter is required for close action")
}
err = cremoteServer.client.CloseTab(tab, timeout)
if err != nil {
return nil, fmt.Errorf("failed to close tab: %w", err)
}
// Remove from history and update current tab
for i, id := range cremoteServer.tabHistory {
if id == tab {
cremoteServer.tabHistory = append(cremoteServer.tabHistory[:i], cremoteServer.tabHistory[i+1:]...)
break
}
}
if cremoteServer.currentTab == tab {
if len(cremoteServer.tabHistory) > 0 {
cremoteServer.currentTab = cremoteServer.tabHistory[len(cremoteServer.tabHistory)-1]
} else {
cremoteServer.currentTab = ""
}
}
message = fmt.Sprintf("Closed tab: %s", tab)
case "list":
tabs, err := cremoteServer.client.ListTabs()
if err != nil {
return nil, fmt.Errorf("failed to list tabs: %w", err)
}
message = fmt.Sprintf("Found %d tabs: %v", len(tabs), tabs)
case "switch":
if tab == "" {
return nil, fmt.Errorf("tab parameter is required for switch action")
}
cremoteServer.currentTab = tab
// Add to history if not already there
found := false
for _, t := range cremoteServer.tabHistory {
if t == tab {
found = true
break
}
}
if !found {
cremoteServer.tabHistory = append(cremoteServer.tabHistory, tab)
}
message = fmt.Sprintf("Switched to tab: %s", tab)
default:
return nil, fmt.Errorf("unknown tab action: %s", action)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(message),
},
IsError: false,
}, nil
})
// Register web_iframe tool
mcpServer.AddTool(mcp.Tool{
Name: "web_iframe",
Description: "Switch iframe context for subsequent operations",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"action": map[string]any{
"type": "string",
"description": "Action to perform",
"enum": []any{"enter", "exit"},
},
"selector": map[string]any{
"type": "string",
"description": "Iframe CSS selector (for enter action)",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional)",
},
},
Required: []string{"action"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
action := getStringParam(params, "action", "")
selector := getStringParam(params, "selector", "")
tab := getStringParam(params, "tab", cremoteServer.currentTab)
if action == "" {
return nil, fmt.Errorf("action parameter is required")
}
if tab == "" {
return nil, fmt.Errorf("no tab available - navigate to a page first")
}
var err error
var message string
switch action {
case "enter":
if selector == "" {
return nil, fmt.Errorf("selector parameter is required for enter action")
}
err = cremoteServer.client.SwitchToIframe(tab, selector)
cremoteServer.iframeMode = true
message = fmt.Sprintf("Entered iframe: %s", selector)
case "exit":
err = cremoteServer.client.SwitchToMain(tab)
cremoteServer.iframeMode = false
message = "Exited iframe context"
default:
return nil, fmt.Errorf("unknown iframe action: %s", action)
}
if err != nil {
return nil, fmt.Errorf("failed to %s iframe: %w", action, err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(message),
},
IsError: false,
}, nil
})
// Register file_upload tool
mcpServer.AddTool(mcp.Tool{
Name: "file_upload",
Description: "Upload a file from the client to the container for use in form uploads",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"local_path": map[string]any{
"type": "string",
"description": "Path to the file on the client machine",
},
"container_path": map[string]any{
"type": "string",
"description": "Optional path where to store the file in the container (defaults to /tmp/filename)",
},
},
Required: []string{"local_path"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
localPath := getStringParam(params, "local_path", "")
containerPath := getStringParam(params, "container_path", "")
if localPath == "" {
return nil, fmt.Errorf("local_path parameter is required")
}
// Upload the file to the container
targetPath, err := cremoteServer.client.UploadFileToContainer(localPath, containerPath)
if err != nil {
return nil, fmt.Errorf("failed to upload file: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("File uploaded successfully to container at: %s", targetPath)),
},
IsError: false,
}, nil
})
// Register file_download tool
mcpServer.AddTool(mcp.Tool{
Name: "file_download",
Description: "Download a file from the container to the client (e.g., downloaded files from browser)",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"container_path": map[string]any{
"type": "string",
"description": "Path to the file in the container",
},
"local_path": map[string]any{
"type": "string",
"description": "Path where to save the file on the client machine",
},
},
Required: []string{"container_path", "local_path"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
containerPath := getStringParam(params, "container_path", "")
localPath := getStringParam(params, "local_path", "")
if containerPath == "" {
return nil, fmt.Errorf("container_path parameter is required")
}
if localPath == "" {
return nil, fmt.Errorf("local_path parameter is required")
}
// Download the file from the container
err := cremoteServer.client.DownloadFileFromContainer(containerPath, localPath)
if err != nil {
return nil, fmt.Errorf("failed to download file: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("File downloaded successfully from container to: %s", localPath)),
},
IsError: false,
}, nil
})
// Start the server
log.Printf("Cremote MCP server ready")
if err := server.ServeStdio(mcpServer); err != nil {
log.Fatalf("Server error: %v", err)
}
}

5
mcp/test_mcp.sh Executable file
View File

@ -0,0 +1,5 @@
#!/bin/bash
(
echo '{"method":"tools/list","params":{},"id":1}'
sleep 1
) | ./cremote-mcp

1
test-upload.txt Normal file
View File

@ -0,0 +1 @@
This is a test file for cremote file transfer