files
This commit is contained in:
parent
d6209cd34f
commit
dc6b288aa4
11
README.md
11
README.md
|
@ -15,6 +15,17 @@ This architecture provides several benefits:
|
|||
- No need to reconnect for each command
|
||||
- Better performance
|
||||
|
||||
## MCP Server
|
||||
|
||||
Cremote includes a Model Context Protocol (MCP) server that provides a structured API for LLMs and AI agents. Instead of using CLI commands, the MCP server offers:
|
||||
|
||||
- **State Management**: Automatic tracking of tabs, history, and iframe context
|
||||
- **Intelligent Abstractions**: High-level tools that combine multiple operations
|
||||
- **Better Error Handling**: Rich error context for debugging
|
||||
- **Automatic Screenshots**: Built-in screenshot capture for documentation
|
||||
|
||||
See the [MCP Server Documentation](mcp/README.md) for setup and usage instructions.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Go 1.16 or higher
|
||||
|
|
124
client/client.go
124
client/client.go
|
@ -5,7 +5,9 @@ import (
|
|||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
|
@ -602,3 +604,125 @@ func (c *Client) ClickElement(tabID, selector string, selectionTimeout, actionTi
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UploadFileToContainer uploads a file from the client to the container
|
||||
// localPath: path to the file on the client machine
|
||||
// containerPath: optional path where to store the file in the container (defaults to /tmp/filename)
|
||||
func (c *Client) UploadFileToContainer(localPath, containerPath string) (string, error) {
|
||||
// Open the local file
|
||||
file, err := os.Open(localPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to open local file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Get file info
|
||||
fileInfo, err := file.Stat()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to get file info: %w", err)
|
||||
}
|
||||
|
||||
// Create multipart form
|
||||
var body bytes.Buffer
|
||||
writer := multipart.NewWriter(&body)
|
||||
|
||||
// Add the file field
|
||||
fileWriter, err := writer.CreateFormFile("file", fileInfo.Name())
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create form file: %w", err)
|
||||
}
|
||||
|
||||
_, err = io.Copy(fileWriter, file)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to copy file data: %w", err)
|
||||
}
|
||||
|
||||
// Add the path field if specified
|
||||
if containerPath != "" {
|
||||
err = writer.WriteField("path", containerPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to write path field: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
err = writer.Close()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to close multipart writer: %w", err)
|
||||
}
|
||||
|
||||
// Send the request
|
||||
req, err := http.NewRequest("POST", c.serverURL+"/upload", &body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", writer.FormDataContentType())
|
||||
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return "", fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, body)
|
||||
}
|
||||
|
||||
// Parse the response
|
||||
var response Response
|
||||
err = json.NewDecoder(resp.Body).Decode(&response)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
|
||||
if !response.Success {
|
||||
return "", fmt.Errorf("upload failed: %s", response.Error)
|
||||
}
|
||||
|
||||
// Extract the target path from response
|
||||
data, ok := response.Data.(map[string]interface{})
|
||||
if !ok {
|
||||
return "", fmt.Errorf("invalid response data format")
|
||||
}
|
||||
|
||||
targetPath, ok := data["target_path"].(string)
|
||||
if !ok {
|
||||
return "", fmt.Errorf("target_path not found in response")
|
||||
}
|
||||
|
||||
return targetPath, nil
|
||||
}
|
||||
|
||||
// DownloadFileFromContainer downloads a file from the container to the client
|
||||
// containerPath: path to the file in the container
|
||||
// localPath: path where to save the file on the client machine
|
||||
func (c *Client) DownloadFileFromContainer(containerPath, localPath string) error {
|
||||
// Create the request
|
||||
url := fmt.Sprintf("%s/download?path=%s", c.serverURL, containerPath)
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to send download request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("download failed with status %d: %s", resp.StatusCode, body)
|
||||
}
|
||||
|
||||
// Create the local file
|
||||
localFile, err := os.Create(localPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create local file: %w", err)
|
||||
}
|
||||
defer localFile.Close()
|
||||
|
||||
// Copy the response body to the local file
|
||||
_, err = io.Copy(localFile, resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to save file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
105
daemon/daemon.go
105
daemon/daemon.go
|
@ -4,6 +4,7 @@ import (
|
|||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
|
@ -95,6 +96,8 @@ func NewDaemon(host string, port int) (*Daemon, error) {
|
|||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/command", daemon.handleCommand)
|
||||
mux.HandleFunc("/status", daemon.handleStatus)
|
||||
mux.HandleFunc("/upload", daemon.handleFileUpload)
|
||||
mux.HandleFunc("/download", daemon.handleFileDownload)
|
||||
|
||||
daemon.server = &http.Server{
|
||||
Addr: fmt.Sprintf("%s:%d", host, port),
|
||||
|
@ -1637,3 +1640,105 @@ func (d *Daemon) switchToMain(tabID string) error {
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleFileUpload handles file upload requests from clients
|
||||
func (d *Daemon) handleFileUpload(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
// Parse multipart form (32MB max memory)
|
||||
err := r.ParseMultipartForm(32 << 20)
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to parse multipart form", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Get the uploaded file
|
||||
file, header, err := r.FormFile("file")
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to get uploaded file", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Get the target path (optional, defaults to /tmp/)
|
||||
targetPath := r.FormValue("path")
|
||||
if targetPath == "" {
|
||||
targetPath = "/tmp/" + header.Filename
|
||||
}
|
||||
|
||||
// Create the target file
|
||||
targetFile, err := os.Create(targetPath)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("Failed to create target file: %v", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer targetFile.Close()
|
||||
|
||||
// Copy the uploaded file to the target location
|
||||
_, err = io.Copy(targetFile, file)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("Failed to save file: %v", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Return success response
|
||||
response := Response{
|
||||
Success: true,
|
||||
Data: map[string]interface{}{
|
||||
"filename": header.Filename,
|
||||
"size": header.Size,
|
||||
"target_path": targetPath,
|
||||
},
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(response)
|
||||
}
|
||||
|
||||
// handleFileDownload handles file download requests from clients
|
||||
func (d *Daemon) handleFileDownload(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
// Get the file path from query parameter
|
||||
filePath := r.URL.Query().Get("path")
|
||||
if filePath == "" {
|
||||
http.Error(w, "File path is required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if file exists and get info
|
||||
fileInfo, err := os.Stat(filePath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
http.Error(w, "File not found", http.StatusNotFound)
|
||||
} else {
|
||||
http.Error(w, fmt.Sprintf("Failed to access file: %v", err), http.StatusInternalServerError)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Open the file
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("Failed to open file: %v", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Set headers for file download
|
||||
w.Header().Set("Content-Type", "application/octet-stream")
|
||||
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", fileInfo.Name()))
|
||||
w.Header().Set("Content-Length", strconv.FormatInt(fileInfo.Size(), 10))
|
||||
|
||||
// Stream the file to the client
|
||||
_, err = io.Copy(w, file)
|
||||
if err != nil {
|
||||
log.Printf("Error streaming file to client: %v", err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,642 @@
|
|||
# LLM Agent Guide: Using Cremote MCP Server for Web Automation
|
||||
|
||||
This document provides comprehensive guidance for LLM agents on how to use the **Cremote MCP Server** for intelligent web automation. The MCP server provides a structured, stateful interface that's optimized for AI-driven web testing and automation workflows.
|
||||
|
||||
## What is the Cremote MCP Server?
|
||||
|
||||
The **Cremote MCP Server** is a Model Context Protocol implementation that wraps cremote's web automation capabilities in a structured API designed specifically for LLMs. Unlike CLI commands, the MCP server provides:
|
||||
|
||||
- **Automatic State Management**: Tracks current tab, tab history, and iframe context
|
||||
- **Intelligent Abstractions**: High-level tools that combine multiple operations
|
||||
- **Rich Error Context**: Detailed error information for better debugging
|
||||
- **Automatic Screenshots**: Built-in screenshot capture for documentation
|
||||
- **Structured Responses**: Consistent, parseable JSON responses
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before using the MCP server, ensure the cremote infrastructure is running:
|
||||
|
||||
1. **Check if everything is already running:**
|
||||
```bash
|
||||
cremote status
|
||||
```
|
||||
|
||||
2. **Start Chromium with remote debugging (if needed):**
|
||||
```bash
|
||||
chromium --remote-debugging-port=9222 --user-data-dir=/tmp/chromium-debug &
|
||||
```
|
||||
|
||||
3. **Start cremote daemon (if needed):**
|
||||
```bash
|
||||
cremotedaemon &
|
||||
```
|
||||
|
||||
4. **The MCP server should be configured in your MCP client** (e.g., Claude Desktop)
|
||||
|
||||
## Available MCP Tools
|
||||
|
||||
### 1. `web_navigate` - Smart Navigation
|
||||
|
||||
Navigate to URLs with automatic tab management and optional screenshot capture.
|
||||
|
||||
**Parameters:**
|
||||
- `url` (required): URL to navigate to
|
||||
- `tab` (optional): Specific tab ID (uses current tab if not specified)
|
||||
- `screenshot` (optional): Take screenshot after navigation (default: false)
|
||||
- `timeout` (optional): Timeout in seconds (default: 5)
|
||||
|
||||
**Example:**
|
||||
```json
|
||||
{
|
||||
"name": "web_navigate",
|
||||
"arguments": {
|
||||
"url": "https://example.com/login",
|
||||
"screenshot": true,
|
||||
"timeout": 10
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Smart Behavior:**
|
||||
- Automatically opens a new tab if none exists
|
||||
- Updates current tab tracking
|
||||
- Adds tab to history for easy switching
|
||||
|
||||
### 2. `web_interact` - Element Interactions
|
||||
|
||||
Interact with web elements through a unified interface.
|
||||
|
||||
**Parameters:**
|
||||
- `action` (required): "click", "fill", "submit", or "upload"
|
||||
- `selector` (required): CSS selector for the target element
|
||||
- `value` (optional): Value for fill/upload actions
|
||||
- `tab` (optional): Tab ID (uses current tab if not specified)
|
||||
- `timeout` (optional): Timeout in seconds (default: 5)
|
||||
|
||||
**Examples:**
|
||||
```json
|
||||
// Fill a form field
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "fill",
|
||||
"selector": "#username",
|
||||
"value": "testuser"
|
||||
}
|
||||
}
|
||||
|
||||
// Click a button
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "click",
|
||||
"selector": "#login-button"
|
||||
}
|
||||
}
|
||||
|
||||
// Submit a form
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "submit",
|
||||
"selector": "form#login-form"
|
||||
}
|
||||
}
|
||||
|
||||
// Upload a file
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "upload",
|
||||
"selector": "input[type=file]",
|
||||
"value": "/path/to/file.pdf"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. `web_extract` - Data Extraction
|
||||
|
||||
Extract information from web pages through multiple methods.
|
||||
|
||||
**Parameters:**
|
||||
- `type` (required): "source", "element", or "javascript"
|
||||
- `selector` (optional): CSS selector (required for "element" type)
|
||||
- `code` (optional): JavaScript code (required for "javascript" type)
|
||||
- `tab` (optional): Tab ID (uses current tab if not specified)
|
||||
- `timeout` (optional): Timeout in seconds (default: 5)
|
||||
|
||||
**Examples:**
|
||||
```json
|
||||
// Get page source
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "source"
|
||||
}
|
||||
}
|
||||
|
||||
// Get specific element HTML
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "element",
|
||||
"selector": ".error-message"
|
||||
}
|
||||
}
|
||||
|
||||
// Execute JavaScript and get result
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "javascript",
|
||||
"code": "document.title"
|
||||
}
|
||||
}
|
||||
|
||||
// Check form validation
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "javascript",
|
||||
"code": "document.getElementById('email').validity.valid"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. `web_screenshot` - Screenshot Capture
|
||||
|
||||
Take screenshots for documentation and debugging.
|
||||
|
||||
**Parameters:**
|
||||
- `output` (required): File path for the screenshot
|
||||
- `full_page` (optional): Capture full page vs viewport (default: false)
|
||||
- `tab` (optional): Tab ID (uses current tab if not specified)
|
||||
- `timeout` (optional): Timeout in seconds (default: 5)
|
||||
|
||||
**Examples:**
|
||||
```json
|
||||
// Viewport screenshot
|
||||
{
|
||||
"name": "web_screenshot",
|
||||
"arguments": {
|
||||
"output": "/tmp/login-page.png"
|
||||
}
|
||||
}
|
||||
|
||||
// Full page screenshot
|
||||
{
|
||||
"name": "web_screenshot",
|
||||
"arguments": {
|
||||
"output": "/tmp/full-page.png",
|
||||
"full_page": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 5. `web_manage_tabs` - Tab Management
|
||||
|
||||
Manage browser tabs with automatic state tracking.
|
||||
|
||||
**Parameters:**
|
||||
- `action` (required): "open", "close", "list", or "switch"
|
||||
- `tab` (optional): Tab ID (required for "close" and "switch" actions)
|
||||
- `timeout` (optional): Timeout in seconds (default: 5)
|
||||
|
||||
**Examples:**
|
||||
```json
|
||||
// Open new tab
|
||||
{
|
||||
"name": "web_manage_tabs",
|
||||
"arguments": {
|
||||
"action": "open"
|
||||
}
|
||||
}
|
||||
|
||||
// List all tabs
|
||||
{
|
||||
"name": "web_manage_tabs",
|
||||
"arguments": {
|
||||
"action": "list"
|
||||
}
|
||||
}
|
||||
|
||||
// Switch to specific tab
|
||||
{
|
||||
"name": "web_manage_tabs",
|
||||
"arguments": {
|
||||
"action": "switch",
|
||||
"tab": "tab-id-123"
|
||||
}
|
||||
}
|
||||
|
||||
// Close current tab
|
||||
{
|
||||
"name": "web_manage_tabs",
|
||||
"arguments": {
|
||||
"action": "close"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 6. `web_iframe` - Iframe Context Management
|
||||
|
||||
Switch between main page and iframe contexts for testing embedded content.
|
||||
|
||||
**Parameters:**
|
||||
- `action` (required): "enter" or "exit"
|
||||
- `selector` (optional): Iframe CSS selector (required for "enter" action)
|
||||
- `tab` (optional): Tab ID (uses current tab if not specified)
|
||||
|
||||
**Examples:**
|
||||
```json
|
||||
// Enter iframe context
|
||||
{
|
||||
"name": "web_iframe",
|
||||
"arguments": {
|
||||
"action": "enter",
|
||||
"selector": "iframe#payment-form"
|
||||
}
|
||||
}
|
||||
|
||||
// Exit iframe context (return to main page)
|
||||
{
|
||||
"name": "web_iframe",
|
||||
"arguments": {
|
||||
"action": "exit"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Response Format
|
||||
|
||||
All MCP tools return a consistent response structure:
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"data": "...", // Tool-specific response data
|
||||
"screenshot": "/tmp/shot.png", // Screenshot path (if captured)
|
||||
"current_tab": "tab-id-123", // Current active tab
|
||||
"tab_history": ["tab-id-123"], // Tab history stack
|
||||
"iframe_mode": false, // Whether in iframe context
|
||||
"error": null, // Error message (if failed)
|
||||
"metadata": {} // Additional context information
|
||||
}
|
||||
```
|
||||
|
||||
## Common Automation Patterns
|
||||
|
||||
### 1. Login Flow Testing
|
||||
|
||||
```json
|
||||
// 1. Navigate to login page with screenshot
|
||||
{
|
||||
"name": "web_navigate",
|
||||
"arguments": {
|
||||
"url": "https://myapp.com/login",
|
||||
"screenshot": true
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Fill credentials
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "fill",
|
||||
"selector": "#email",
|
||||
"value": "user@example.com"
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "fill",
|
||||
"selector": "#password",
|
||||
"value": "password123"
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Submit login
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "click",
|
||||
"selector": "#login-button"
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Verify success
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "javascript",
|
||||
"code": "document.querySelector('.welcome-message')?.textContent"
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Document result
|
||||
{
|
||||
"name": "web_screenshot",
|
||||
"arguments": {
|
||||
"output": "/tmp/login-success.png"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Form Validation Testing
|
||||
|
||||
```json
|
||||
// 1. Navigate to form
|
||||
{
|
||||
"name": "web_navigate",
|
||||
"arguments": {
|
||||
"url": "https://myapp.com/register"
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Test empty form submission
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "click",
|
||||
"selector": "#submit-button"
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Check for validation errors
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "element",
|
||||
"selector": ".error-message"
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Test invalid email
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "fill",
|
||||
"selector": "#email",
|
||||
"value": "invalid-email"
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Verify JavaScript validation
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "javascript",
|
||||
"code": "document.getElementById('email').validity.valid"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Multi-Tab Workflow
|
||||
|
||||
```json
|
||||
// 1. Open multiple tabs for comparison
|
||||
{
|
||||
"name": "web_manage_tabs",
|
||||
"arguments": {
|
||||
"action": "open"
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
"name": "web_navigate",
|
||||
"arguments": {
|
||||
"url": "https://app.com/admin"
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
"name": "web_manage_tabs",
|
||||
"arguments": {
|
||||
"action": "open"
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
"name": "web_navigate",
|
||||
"arguments": {
|
||||
"url": "https://app.com/user"
|
||||
}
|
||||
}
|
||||
|
||||
// 2. List tabs to see current state
|
||||
{
|
||||
"name": "web_manage_tabs",
|
||||
"arguments": {
|
||||
"action": "list"
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Switch between tabs as needed
|
||||
{
|
||||
"name": "web_manage_tabs",
|
||||
"arguments": {
|
||||
"action": "switch",
|
||||
"tab": "first-tab-id"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Iframe Testing (Payment Forms, Widgets)
|
||||
|
||||
```json
|
||||
// 1. Navigate to page with iframe
|
||||
{
|
||||
"name": "web_navigate",
|
||||
"arguments": {
|
||||
"url": "https://shop.com/checkout"
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Enter iframe context
|
||||
{
|
||||
"name": "web_iframe",
|
||||
"arguments": {
|
||||
"action": "enter",
|
||||
"selector": "iframe.payment-frame"
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Interact with iframe content
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "fill",
|
||||
"selector": "#card-number",
|
||||
"value": "4111111111111111"
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "fill",
|
||||
"selector": "#expiry",
|
||||
"value": "12/25"
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Exit iframe context
|
||||
{
|
||||
"name": "web_iframe",
|
||||
"arguments": {
|
||||
"action": "exit"
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Continue with main page
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "click",
|
||||
"selector": "#complete-order"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Best Practices for LLMs
|
||||
|
||||
### 1. State Awareness
|
||||
- The MCP server automatically tracks state, but always check the response for current context
|
||||
- Use the `current_tab` and `iframe_mode` fields to understand your current position
|
||||
- The `tab_history` helps you understand available tabs
|
||||
|
||||
### 2. Error Handling
|
||||
- Always check the `success` field in responses
|
||||
- Use the `error` field for detailed error information
|
||||
- Take screenshots when errors occur for debugging: `"screenshot": true`
|
||||
|
||||
### 3. Timeout Management
|
||||
- Use longer timeouts for slow-loading pages or complex interactions
|
||||
- Default 5-second timeouts work for most scenarios
|
||||
- Increase timeouts for file uploads or heavy JavaScript applications
|
||||
|
||||
### 4. Screenshot Strategy
|
||||
- Take screenshots at key points for documentation
|
||||
- Use `full_page: true` for comprehensive page captures
|
||||
- Screenshot before and after critical actions for debugging
|
||||
|
||||
### 5. Verification Patterns
|
||||
- Always verify actions completed successfully
|
||||
- Use JavaScript extraction to check application state
|
||||
- Combine element extraction with JavaScript validation
|
||||
|
||||
## Debugging Failed Tests
|
||||
|
||||
### 1. Capture Current State
|
||||
```json
|
||||
// Get page source for analysis
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "source"
|
||||
}
|
||||
}
|
||||
|
||||
// Take screenshot to see visual state
|
||||
{
|
||||
"name": "web_screenshot",
|
||||
"arguments": {
|
||||
"output": "/tmp/debug-state.png",
|
||||
"full_page": true
|
||||
}
|
||||
}
|
||||
|
||||
// Check JavaScript console errors
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "javascript",
|
||||
"code": "console.error.toString()"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Element Debugging
|
||||
```json
|
||||
// Check if element exists
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "javascript",
|
||||
"code": "document.querySelector('#my-element') !== null"
|
||||
}
|
||||
}
|
||||
|
||||
// Get element properties
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "javascript",
|
||||
"code": "JSON.stringify({visible: document.querySelector('#my-element')?.offsetParent !== null, text: document.querySelector('#my-element')?.textContent})"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Network and Loading Issues
|
||||
```json
|
||||
// Check if page is still loading
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "javascript",
|
||||
"code": "document.readyState"
|
||||
}
|
||||
}
|
||||
|
||||
// Check for JavaScript errors
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "javascript",
|
||||
"code": "window.onerror ? 'Errors detected' : 'No errors'"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Advantages Over CLI Commands
|
||||
|
||||
### 1. **Automatic State Management**
|
||||
- No need to manually track tab IDs
|
||||
- Automatic current tab resolution
|
||||
- Persistent iframe context tracking
|
||||
|
||||
### 2. **Rich Error Context**
|
||||
- Detailed error messages with context
|
||||
- Automatic screenshot capture on failures
|
||||
- Structured error responses for better debugging
|
||||
|
||||
### 3. **Intelligent Abstractions**
|
||||
- Combined operations in single tools
|
||||
- Smart parameter defaults and validation
|
||||
- Automatic resource management
|
||||
|
||||
### 4. **Better Performance**
|
||||
- Direct library integration (no subprocess overhead)
|
||||
- Persistent connections to cremote daemon
|
||||
- Efficient state tracking
|
||||
|
||||
### 5. **Structured Responses**
|
||||
- Consistent JSON format for all responses
|
||||
- Rich metadata for decision making
|
||||
- Easy parsing and error handling
|
||||
|
||||
## Key Differences from CLI Usage
|
||||
|
||||
| Aspect | CLI Commands | MCP Server |
|
||||
|--------|-------------|------------|
|
||||
| **State Tracking** | Manual tab ID management | Automatic state management |
|
||||
| **Error Handling** | Text parsing required | Structured error objects |
|
||||
| **Screenshots** | Manual command execution | Automatic capture options |
|
||||
| **Performance** | Subprocess overhead | Direct library calls |
|
||||
| **Response Format** | Text output | Structured JSON |
|
||||
| **Context Management** | Manual iframe tracking | Automatic context switching |
|
||||
| **Resource Cleanup** | Manual tab management | Automatic resource tracking |
|
||||
|
||||
The Cremote MCP Server transforms web automation from a series of CLI commands into an intelligent, stateful API that's optimized for AI-driven testing and automation workflows.
|
|
@ -0,0 +1,379 @@
|
|||
# Cremote MCP Tools - LLM Usage Guide
|
||||
|
||||
This guide explains how LLMs can use the cremote MCP (Model Context Protocol) tools for web automation tasks.
|
||||
|
||||
## Available Tools
|
||||
|
||||
The cremote MCP server provides six comprehensive web automation tools:
|
||||
|
||||
### 1. `web_navigate_cremotemcp`
|
||||
Navigate to URLs and optionally take screenshots.
|
||||
|
||||
**Parameters:**
|
||||
- `url` (required): The URL to navigate to
|
||||
- `screenshot` (optional): Boolean, take a screenshot after navigation
|
||||
- `tab` (optional): Specific tab ID to use
|
||||
- `timeout` (optional): Timeout in seconds (default: 5)
|
||||
|
||||
**Example Usage:**
|
||||
```
|
||||
web_navigate_cremotemcp:
|
||||
url: "https://example.com"
|
||||
screenshot: true
|
||||
```
|
||||
|
||||
### 2. `web_interact_cremotemcp`
|
||||
Interact with web elements through various actions.
|
||||
|
||||
**Parameters:**
|
||||
- `action` (required): One of "click", "fill", "submit", "upload"
|
||||
- `selector` (required): CSS selector for the target element
|
||||
- `value` (optional): Value for fill/upload actions
|
||||
- `tab` (optional): Specific tab ID to use
|
||||
- `timeout` (optional): Timeout in seconds (default: 5)
|
||||
|
||||
**Example Usage:**
|
||||
```
|
||||
web_interact_cremotemcp:
|
||||
action: "click"
|
||||
selector: "button.submit"
|
||||
|
||||
web_interact_cremotemcp:
|
||||
action: "fill"
|
||||
selector: "input[name='email']"
|
||||
value: "user@example.com"
|
||||
```
|
||||
|
||||
### 3. `web_extract_cremotemcp`
|
||||
Extract data from web pages (HTML source, element content, or JavaScript execution).
|
||||
|
||||
**Parameters:**
|
||||
- `type` (required): One of "source", "element", "javascript"
|
||||
- `selector` (optional): CSS selector (required for "element" type)
|
||||
- `code` (optional): JavaScript code (required for "javascript" type)
|
||||
- `tab` (optional): Specific tab ID to use
|
||||
- `timeout` (optional): Timeout in seconds (default: 5)
|
||||
|
||||
**Example Usage:**
|
||||
```
|
||||
web_extract_cremotemcp:
|
||||
type: "source"
|
||||
|
||||
web_extract_cremotemcp:
|
||||
type: "element"
|
||||
selector: "div.content"
|
||||
|
||||
web_extract_cremotemcp:
|
||||
type: "javascript"
|
||||
code: "document.title"
|
||||
```
|
||||
|
||||
### 4. `web_screenshot_cremotemcp`
|
||||
Take screenshots of web pages.
|
||||
|
||||
**Parameters:**
|
||||
- `output` (required): File path where screenshot will be saved
|
||||
- `full_page` (optional): Capture full page (default: false)
|
||||
- `tab` (optional): Specific tab ID to use
|
||||
- `timeout` (optional): Timeout in seconds (default: 5)
|
||||
|
||||
**Example Usage:**
|
||||
```
|
||||
web_screenshot_cremotemcp:
|
||||
output: "/tmp/page-screenshot.png"
|
||||
full_page: true
|
||||
```
|
||||
|
||||
### 5. `web_manage_tabs_cremotemcp`
|
||||
Manage browser tabs (open, close, list, switch).
|
||||
|
||||
**Parameters:**
|
||||
- `action` (required): One of "open", "close", "list", "switch"
|
||||
- `tab` (optional): Tab ID (required for "close" and "switch" actions)
|
||||
- `timeout` (optional): Timeout in seconds (default: 5)
|
||||
|
||||
**Example Usage:**
|
||||
```
|
||||
web_manage_tabs_cremotemcp:
|
||||
action: "open"
|
||||
|
||||
web_manage_tabs_cremotemcp:
|
||||
action: "list"
|
||||
|
||||
web_manage_tabs_cremotemcp:
|
||||
action: "switch"
|
||||
tab: "ABC123"
|
||||
```
|
||||
|
||||
### 6. `web_iframe_cremotemcp`
|
||||
Switch iframe context for subsequent operations.
|
||||
|
||||
**Parameters:**
|
||||
- `action` (required): One of "enter", "exit"
|
||||
- `selector` (optional): Iframe CSS selector (required for "enter" action)
|
||||
- `tab` (optional): Specific tab ID to use
|
||||
|
||||
**Example Usage:**
|
||||
```
|
||||
web_iframe_cremotemcp:
|
||||
action: "enter"
|
||||
selector: "iframe#payment-form"
|
||||
|
||||
web_iframe_cremotemcp:
|
||||
action: "exit"
|
||||
```
|
||||
|
||||
## Common Usage Patterns
|
||||
|
||||
### 1. Basic Web Navigation
|
||||
```
|
||||
# Navigate to a website
|
||||
web_navigate_cremotemcp:
|
||||
url: "https://example.com"
|
||||
screenshot: true
|
||||
```
|
||||
|
||||
### 2. Form Interaction Sequence
|
||||
```
|
||||
# 1. Navigate to the page
|
||||
web_navigate_cremotemcp:
|
||||
url: "https://example.com/login"
|
||||
|
||||
# 2. Fill username field
|
||||
web_interact_cremotemcp:
|
||||
action: "fill"
|
||||
selector: "input[name='username']"
|
||||
value: "myusername"
|
||||
|
||||
# 3. Fill password field
|
||||
web_interact_cremotemcp:
|
||||
action: "fill"
|
||||
selector: "input[name='password']"
|
||||
value: "mypassword"
|
||||
|
||||
# 4. Submit the form
|
||||
web_interact_cremotemcp:
|
||||
action: "submit"
|
||||
selector: "form"
|
||||
```
|
||||
|
||||
### 3. Clicking Elements
|
||||
```
|
||||
# Click a button
|
||||
web_interact_cremotemcp:
|
||||
action: "click"
|
||||
selector: "button#submit-btn"
|
||||
|
||||
# Click a link
|
||||
web_interact_cremotemcp:
|
||||
action: "click"
|
||||
selector: "a[href='/dashboard']"
|
||||
```
|
||||
|
||||
## Best Practices for LLMs
|
||||
|
||||
### 1. Always Start with Navigation
|
||||
Before interacting with elements, navigate to the target page:
|
||||
```
|
||||
web_navigate_cremotemcp:
|
||||
url: "https://target-website.com"
|
||||
```
|
||||
|
||||
### 2. Use Specific CSS Selectors
|
||||
Be as specific as possible with selectors to avoid ambiguity:
|
||||
- Good: `input[name='email']`, `button.primary-submit`
|
||||
- Avoid: `input`, `button`
|
||||
|
||||
### 3. Take Screenshots for Debugging
|
||||
When troubleshooting or documenting, use screenshots:
|
||||
```
|
||||
web_navigate_cremotemcp:
|
||||
url: "https://example.com"
|
||||
screenshot: true
|
||||
```
|
||||
|
||||
### 4. Handle Timeouts Appropriately
|
||||
For slow-loading pages, increase timeout:
|
||||
```
|
||||
web_navigate_cremotemcp:
|
||||
url: "https://slow-website.com"
|
||||
timeout: 10
|
||||
```
|
||||
|
||||
### 5. Sequential Operations
|
||||
Perform operations in logical sequence:
|
||||
1. Navigate to page
|
||||
2. Fill required fields
|
||||
3. Submit forms
|
||||
4. Navigate to next page if needed
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Common Error Scenarios:
|
||||
1. **Element not found**: Selector doesn't match any elements
|
||||
2. **Timeout**: Page takes too long to load or element to appear
|
||||
3. **Navigation failed**: URL is invalid or unreachable
|
||||
|
||||
### Troubleshooting Tips:
|
||||
1. Verify the URL is correct and accessible
|
||||
2. Check CSS selectors using browser developer tools
|
||||
3. Increase timeout for slow-loading content
|
||||
4. Take screenshots to see current page state
|
||||
|
||||
## Tab Management
|
||||
|
||||
The tools automatically manage browser tabs:
|
||||
- If no tab is specified, a new tab is created automatically
|
||||
- Tab IDs are returned in responses for reference
|
||||
- Multiple tabs can be managed by specifying tab IDs
|
||||
|
||||
## Security Considerations
|
||||
|
||||
### Safe Practices:
|
||||
- Only navigate to trusted websites
|
||||
- Be cautious with form submissions
|
||||
- Avoid entering sensitive information in examples
|
||||
- Use screenshots sparingly to avoid exposing sensitive data
|
||||
|
||||
### Limitations:
|
||||
- Cannot bypass CAPTCHA or other anti-automation measures
|
||||
- Subject to same-origin policy restrictions
|
||||
- May not work with heavily JavaScript-dependent sites
|
||||
|
||||
## Example: Complete Web Automation Task
|
||||
|
||||
Here's a complete example of automating a web form:
|
||||
|
||||
```
|
||||
# Step 1: Navigate to the form page
|
||||
web_navigate_cremotemcp:
|
||||
url: "https://example.com/contact"
|
||||
screenshot: true
|
||||
|
||||
# Step 2: Fill out the contact form
|
||||
web_interact_cremotemcp:
|
||||
action: "fill"
|
||||
selector: "input[name='name']"
|
||||
value: "John Doe"
|
||||
|
||||
web_interact_cremotemcp:
|
||||
action: "fill"
|
||||
selector: "input[name='email']"
|
||||
value: "john@example.com"
|
||||
|
||||
web_interact_cremotemcp:
|
||||
action: "fill"
|
||||
selector: "textarea[name='message']"
|
||||
value: "Hello, this is a test message."
|
||||
|
||||
# Step 3: Submit the form
|
||||
web_interact_cremotemcp:
|
||||
action: "submit"
|
||||
selector: "form#contact-form"
|
||||
|
||||
# Step 4: Take a screenshot of the result
|
||||
web_navigate_cremotemcp:
|
||||
url: "current" # Stay on current page
|
||||
screenshot: true
|
||||
```
|
||||
|
||||
## Integration Notes
|
||||
|
||||
- Tools use the `_cremotemcp` suffix to avoid naming conflicts
|
||||
- Responses include success status and descriptive messages
|
||||
- Screenshots are saved to `/tmp/` directory with timestamps
|
||||
- The underlying cremote daemon handles browser management
|
||||
|
||||
## Advanced Usage Examples
|
||||
|
||||
### Testing Web Applications
|
||||
```
|
||||
# Navigate to application
|
||||
web_navigate_cremotemcp:
|
||||
url: "https://myapp.com/login"
|
||||
screenshot: true
|
||||
|
||||
# Test login functionality
|
||||
web_interact_cremotemcp:
|
||||
action: "fill"
|
||||
selector: "#username"
|
||||
value: "testuser"
|
||||
|
||||
web_interact_cremotemcp:
|
||||
action: "fill"
|
||||
selector: "#password"
|
||||
value: "testpass"
|
||||
|
||||
web_interact_cremotemcp:
|
||||
action: "click"
|
||||
selector: "button[type='submit']"
|
||||
|
||||
# Verify successful login
|
||||
web_navigate_cremotemcp:
|
||||
url: "current"
|
||||
screenshot: true
|
||||
```
|
||||
|
||||
### Data Extraction Workflows
|
||||
```
|
||||
# Navigate to data source
|
||||
web_navigate_cremotemcp:
|
||||
url: "https://data-site.com/table"
|
||||
|
||||
# Click through pagination or filters
|
||||
web_interact_cremotemcp:
|
||||
action: "click"
|
||||
selector: ".filter-button"
|
||||
|
||||
# Take screenshot to document current state
|
||||
web_navigate_cremotemcp:
|
||||
url: "current"
|
||||
screenshot: true
|
||||
```
|
||||
|
||||
### File Upload Testing
|
||||
```
|
||||
# Navigate to upload form
|
||||
web_navigate_cremotemcp:
|
||||
url: "https://example.com/upload"
|
||||
|
||||
# Upload a file
|
||||
web_interact_cremotemcp:
|
||||
action: "upload"
|
||||
selector: "input[type='file']"
|
||||
value: "/path/to/test-file.pdf"
|
||||
|
||||
# Submit the upload form
|
||||
web_interact_cremotemcp:
|
||||
action: "click"
|
||||
selector: "button.upload-submit"
|
||||
```
|
||||
|
||||
## Tool Response Format
|
||||
|
||||
Both tools return structured responses:
|
||||
|
||||
**Success Response:**
|
||||
```
|
||||
"Successfully navigated to https://example.com in tab ABC123 (screenshot saved to /tmp/navigate-1234567890.png)"
|
||||
```
|
||||
|
||||
**Error Response:**
|
||||
```
|
||||
"failed to load URL: context deadline exceeded"
|
||||
```
|
||||
|
||||
## CSS Selector Best Practices
|
||||
|
||||
### Recommended Selector Types:
|
||||
1. **ID selectors**: `#unique-id` (most reliable)
|
||||
2. **Name attributes**: `input[name='fieldname']`
|
||||
3. **Class combinations**: `.primary.button`
|
||||
4. **Attribute selectors**: `button[data-action='submit']`
|
||||
|
||||
### Avoid These Selectors:
|
||||
1. **Generic tags**: `div`, `span`, `input` (too broad)
|
||||
2. **Position-based**: `:nth-child()` (fragile)
|
||||
3. **Text-based**: `:contains()` (not standard CSS)
|
||||
|
||||
This documentation should help LLMs effectively use the cremote MCP tools for web automation tasks.
|
|
@ -0,0 +1,96 @@
|
|||
# Cremote MCP Tools - Quick Reference
|
||||
|
||||
## Tool Names
|
||||
- `web_navigate_cremotemcp` - Navigate to URLs
|
||||
- `web_interact_cremotemcp` - Interact with elements
|
||||
- `web_extract_cremotemcp` - Extract page data
|
||||
- `web_screenshot_cremotemcp` - Take screenshots
|
||||
- `web_manage_tabs_cremotemcp` - Manage browser tabs
|
||||
- `web_iframe_cremotemcp` - Switch iframe context
|
||||
|
||||
## Essential Parameters
|
||||
|
||||
### web_navigate_cremotemcp
|
||||
```yaml
|
||||
url: "https://example.com" # Required
|
||||
screenshot: true # Optional, default false
|
||||
timeout: 10 # Optional, default 5 seconds
|
||||
```
|
||||
|
||||
### web_interact_cremotemcp
|
||||
```yaml
|
||||
action: "click" # Required: click|fill|submit|upload
|
||||
selector: "button.submit" # Required: CSS selector
|
||||
value: "text to fill" # Required for fill/upload actions
|
||||
timeout: 10 # Optional, default 5 seconds
|
||||
```
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Navigate + Screenshot
|
||||
```yaml
|
||||
web_navigate_cremotemcp:
|
||||
url: "https://example.com"
|
||||
screenshot: true
|
||||
```
|
||||
|
||||
### Fill Form Field
|
||||
```yaml
|
||||
web_interact_cremotemcp:
|
||||
action: "fill"
|
||||
selector: "input[name='email']"
|
||||
value: "user@example.com"
|
||||
```
|
||||
|
||||
### Click Button
|
||||
```yaml
|
||||
web_interact_cremotemcp:
|
||||
action: "click"
|
||||
selector: "button#submit"
|
||||
```
|
||||
|
||||
### Submit Form
|
||||
```yaml
|
||||
web_interact_cremotemcp:
|
||||
action: "submit"
|
||||
selector: "form"
|
||||
```
|
||||
|
||||
### Upload File
|
||||
```yaml
|
||||
web_interact_cremotemcp:
|
||||
action: "upload"
|
||||
selector: "input[type='file']"
|
||||
value: "/path/to/file.pdf"
|
||||
```
|
||||
|
||||
## Best CSS Selectors
|
||||
|
||||
✅ **Good:**
|
||||
- `#unique-id`
|
||||
- `input[name='fieldname']`
|
||||
- `button.primary-submit`
|
||||
- `form#login-form`
|
||||
|
||||
❌ **Avoid:**
|
||||
- `div` (too generic)
|
||||
- `input` (too broad)
|
||||
- `:nth-child(3)` (fragile)
|
||||
|
||||
## Typical Workflow
|
||||
|
||||
1. **Navigate** to target page
|
||||
2. **Fill** required form fields
|
||||
3. **Click** submit buttons
|
||||
4. **Take screenshots** for verification
|
||||
5. **Navigate** to next page if needed
|
||||
|
||||
## Error Handling
|
||||
|
||||
- **Element not found**: Check CSS selector
|
||||
- **Timeout**: Increase timeout parameter
|
||||
- **Navigation failed**: Verify URL accessibility
|
||||
|
||||
## Screenshots
|
||||
|
||||
Screenshots are automatically saved to `/tmp/navigate-{timestamp}.png` when requested.
|
|
@ -0,0 +1,252 @@
|
|||
# Cremote MCP Server
|
||||
|
||||
This is a Model Context Protocol (MCP) server that exposes cremote's web automation capabilities to LLMs and AI agents. Instead of using CLI commands, this server provides a structured API that maintains state and provides intelligent abstractions.
|
||||
|
||||
## Features
|
||||
|
||||
- **State Management**: Automatically tracks current tab, tab history, and iframe context
|
||||
- **Intelligent Abstractions**: High-level tools that combine multiple cremote operations
|
||||
- **Automatic Screenshots**: Optional screenshot capture for debugging and documentation
|
||||
- **Error Recovery**: Better error handling and context for LLMs
|
||||
- **Resource Management**: Automatic cleanup and connection management
|
||||
|
||||
## Quick Start for LLMs
|
||||
|
||||
**For LLM agents**: See the comprehensive [LLM MCP Guide](LLM_MCP_GUIDE.md) for detailed usage instructions, examples, and best practices.
|
||||
|
||||
## Available Tools
|
||||
|
||||
### 1. `web_navigate`
|
||||
Navigate to URLs with optional screenshot capture.
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "web_navigate",
|
||||
"arguments": {
|
||||
"url": "https://example.com",
|
||||
"screenshot": true,
|
||||
"timeout": 10
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. `web_interact`
|
||||
Interact with web elements (click, fill, submit, upload).
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "fill",
|
||||
"selector": "#username",
|
||||
"value": "testuser",
|
||||
"timeout": 5
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. `web_extract`
|
||||
Extract data from pages (source, element HTML, JavaScript execution).
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "javascript",
|
||||
"code": "document.title",
|
||||
"timeout": 5
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. `web_screenshot`
|
||||
Take screenshots of the current page.
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "web_screenshot",
|
||||
"arguments": {
|
||||
"output": "/tmp/page.png",
|
||||
"full_page": true,
|
||||
"timeout": 5
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 5. `web_manage_tabs`
|
||||
Manage browser tabs (open, close, list, switch).
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "web_manage_tabs",
|
||||
"arguments": {
|
||||
"action": "open",
|
||||
"timeout": 5
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 6. `web_iframe`
|
||||
Switch iframe context for subsequent operations.
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "web_iframe",
|
||||
"arguments": {
|
||||
"action": "enter",
|
||||
"selector": "iframe#payment-form"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Installation & Usage
|
||||
|
||||
### Prerequisites
|
||||
|
||||
1. **Cremote daemon must be running**:
|
||||
```bash
|
||||
cremotedaemon
|
||||
```
|
||||
|
||||
2. **Chrome/Chromium with remote debugging**:
|
||||
```bash
|
||||
chromium --remote-debugging-port=9222 --user-data-dir=/tmp/chromium-debug
|
||||
```
|
||||
|
||||
### Build the MCP Server
|
||||
|
||||
```bash
|
||||
cd mcp/
|
||||
go build -o cremote-mcp .
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
Set environment variables to configure the cremote connection:
|
||||
|
||||
```bash
|
||||
export CREMOTE_HOST=localhost
|
||||
export CREMOTE_PORT=8989
|
||||
```
|
||||
|
||||
### Running with Claude Desktop
|
||||
|
||||
Add to your Claude Desktop configuration (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"cremote": {
|
||||
"command": "/path/to/cremote-mcp",
|
||||
"env": {
|
||||
"CREMOTE_HOST": "localhost",
|
||||
"CREMOTE_PORT": "8989"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Running with Other MCP Clients
|
||||
|
||||
The server communicates via JSON-RPC over stdio, so it can be used with any MCP-compatible client:
|
||||
|
||||
```bash
|
||||
echo '{"method":"tools/list","params":{},"id":1}' | ./cremote-mcp
|
||||
```
|
||||
|
||||
## Response Format
|
||||
|
||||
All tool responses include:
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"data": "...",
|
||||
"screenshot": "/tmp/screenshot.png",
|
||||
"current_tab": "tab-id-123",
|
||||
"tab_history": ["tab-id-123", "tab-id-456"],
|
||||
"iframe_mode": false,
|
||||
"error": null,
|
||||
"metadata": {}
|
||||
}
|
||||
```
|
||||
|
||||
## Example Workflow
|
||||
|
||||
```json
|
||||
// 1. Navigate to a page
|
||||
{
|
||||
"name": "web_navigate",
|
||||
"arguments": {
|
||||
"url": "https://example.com/login",
|
||||
"screenshot": true
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Fill login form
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "fill",
|
||||
"selector": "#username",
|
||||
"value": "testuser"
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "fill",
|
||||
"selector": "#password",
|
||||
"value": "password123"
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Submit form
|
||||
{
|
||||
"name": "web_interact",
|
||||
"arguments": {
|
||||
"action": "click",
|
||||
"selector": "#login-button"
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Extract result
|
||||
{
|
||||
"name": "web_extract",
|
||||
"arguments": {
|
||||
"type": "javascript",
|
||||
"code": "document.querySelector('.welcome-message')?.textContent"
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Take final screenshot
|
||||
{
|
||||
"name": "web_screenshot",
|
||||
"arguments": {
|
||||
"output": "/tmp/login-success.png",
|
||||
"full_page": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Benefits Over CLI
|
||||
|
||||
- **State Management**: No need to manually track tab IDs
|
||||
- **Better Error Context**: Rich error information for debugging
|
||||
- **Automatic Screenshots**: Built-in screenshot capture for documentation
|
||||
- **Intelligent Defaults**: Smart parameter handling and fallbacks
|
||||
- **Resource Cleanup**: Automatic management of tabs and files
|
||||
- **Structured Responses**: Consistent, parseable response format
|
||||
|
||||
## Development
|
||||
|
||||
To extend the MCP server with new tools:
|
||||
|
||||
1. Add the tool definition to `handleToolsList()`
|
||||
2. Add a case in `handleToolCall()`
|
||||
3. Implement the handler function following the pattern of existing handlers
|
||||
4. Update this documentation
|
||||
|
||||
The server is designed to be easily extensible while maintaining consistency with the cremote client library.
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,923 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"git.teamworkapps.com/shortcut/cremote/client"
|
||||
)
|
||||
|
||||
// DebugLogger handles debug logging to file
|
||||
type DebugLogger struct {
|
||||
file *os.File
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewDebugLogger creates a new debug logger
|
||||
func NewDebugLogger() (*DebugLogger, error) {
|
||||
logDir := "/tmp/cremote-mcp-logs"
|
||||
if err := os.MkdirAll(logDir, 0755); err != nil {
|
||||
return nil, fmt.Errorf("failed to create log directory: %w", err)
|
||||
}
|
||||
|
||||
logFile := filepath.Join(logDir, fmt.Sprintf("mcp-stdio-%d.log", time.Now().Unix()))
|
||||
file, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open log file: %w", err)
|
||||
}
|
||||
|
||||
logger := log.New(file, "", log.LstdFlags|log.Lmicroseconds|log.Lshortfile)
|
||||
logger.Printf("=== MCP STDIO Server Debug Log Started ===")
|
||||
|
||||
return &DebugLogger{
|
||||
file: file,
|
||||
logger: logger,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Log writes a debug message
|
||||
func (d *DebugLogger) Log(format string, args ...interface{}) {
|
||||
if d != nil && d.logger != nil {
|
||||
d.logger.Printf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
// LogJSON logs a JSON object with a label
|
||||
func (d *DebugLogger) LogJSON(label string, obj interface{}) {
|
||||
if d != nil && d.logger != nil {
|
||||
jsonBytes, err := json.MarshalIndent(obj, "", " ")
|
||||
if err != nil {
|
||||
d.logger.Printf("%s: JSON marshal error: %v", label, err)
|
||||
} else {
|
||||
d.logger.Printf("%s:\n%s", label, string(jsonBytes))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close closes the debug logger
|
||||
func (d *DebugLogger) Close() {
|
||||
if d != nil && d.file != nil {
|
||||
d.logger.Printf("=== MCP STDIO Server Debug Log Ended ===")
|
||||
d.file.Close()
|
||||
}
|
||||
}
|
||||
|
||||
var debugLogger *DebugLogger
|
||||
|
||||
// MCPServer wraps the cremote client with MCP protocol
|
||||
type MCPServer struct {
|
||||
client *client.Client
|
||||
currentTab string
|
||||
tabHistory []string
|
||||
iframeMode bool
|
||||
screenshots []string
|
||||
}
|
||||
|
||||
// MCPRequest represents an incoming MCP request
|
||||
type MCPRequest struct {
|
||||
JSONRPC string `json:"jsonrpc,omitempty"`
|
||||
Method string `json:"method"`
|
||||
Params map[string]interface{} `json:"params"`
|
||||
ID interface{} `json:"id"`
|
||||
}
|
||||
|
||||
// MCPResponse represents an MCP response
|
||||
type MCPResponse struct {
|
||||
JSONRPC string `json:"jsonrpc,omitempty"`
|
||||
Result interface{} `json:"result,omitempty"`
|
||||
Error *MCPError `json:"error,omitempty"`
|
||||
ID interface{} `json:"id"`
|
||||
}
|
||||
|
||||
// MCPError represents an MCP error
|
||||
type MCPError struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
// ToolResult represents the result of a tool execution
|
||||
type ToolResult struct {
|
||||
Success bool `json:"success"`
|
||||
Data interface{} `json:"data,omitempty"`
|
||||
Screenshot string `json:"screenshot,omitempty"`
|
||||
CurrentTab string `json:"current_tab,omitempty"`
|
||||
TabHistory []string `json:"tab_history,omitempty"`
|
||||
IframeMode bool `json:"iframe_mode"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Metadata map[string]string `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
// NewMCPServer creates a new MCP server instance
|
||||
func NewMCPServer(host string, port int) *MCPServer {
|
||||
c := client.NewClient(host, port)
|
||||
return &MCPServer{
|
||||
client: c,
|
||||
tabHistory: make([]string, 0),
|
||||
screenshots: make([]string, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// HandleRequest processes an MCP request and returns a response
|
||||
func (s *MCPServer) HandleRequest(req MCPRequest) MCPResponse {
|
||||
debugLogger.Log("HandleRequest called with method: %s, ID: %v", req.Method, req.ID)
|
||||
debugLogger.LogJSON("Incoming Request", req)
|
||||
|
||||
var resp MCPResponse
|
||||
|
||||
switch req.Method {
|
||||
case "initialize":
|
||||
debugLogger.Log("Handling initialize request")
|
||||
resp = s.handleInitialize(req)
|
||||
case "tools/list":
|
||||
debugLogger.Log("Handling tools/list request")
|
||||
resp = s.handleToolsList(req)
|
||||
case "tools/call":
|
||||
debugLogger.Log("Handling tools/call request")
|
||||
resp = s.handleToolCall(req)
|
||||
default:
|
||||
debugLogger.Log("Unknown method: %s", req.Method)
|
||||
resp = MCPResponse{
|
||||
Error: &MCPError{
|
||||
Code: -32601,
|
||||
Message: fmt.Sprintf("Method not found: %s", req.Method),
|
||||
},
|
||||
ID: req.ID,
|
||||
}
|
||||
}
|
||||
|
||||
debugLogger.LogJSON("Response", resp)
|
||||
debugLogger.Log("HandleRequest completed for method: %s", req.Method)
|
||||
return resp
|
||||
}
|
||||
|
||||
// handleInitialize handles the MCP initialize request
|
||||
func (s *MCPServer) handleInitialize(req MCPRequest) MCPResponse {
|
||||
debugLogger.Log("handleInitialize: Processing initialize request")
|
||||
debugLogger.LogJSON("Initialize request params", req.Params)
|
||||
|
||||
result := map[string]interface{}{
|
||||
"protocolVersion": "2024-11-05",
|
||||
"capabilities": map[string]interface{}{
|
||||
"tools": map[string]interface{}{
|
||||
"listChanged": true,
|
||||
},
|
||||
},
|
||||
"serverInfo": map[string]interface{}{
|
||||
"name": "cremote-mcp",
|
||||
"version": "1.0.0",
|
||||
},
|
||||
}
|
||||
|
||||
debugLogger.LogJSON("Initialize response result", result)
|
||||
|
||||
return MCPResponse{
|
||||
Result: result,
|
||||
ID: req.ID,
|
||||
}
|
||||
}
|
||||
|
||||
// handleToolsList returns the list of available tools
|
||||
func (s *MCPServer) handleToolsList(req MCPRequest) MCPResponse {
|
||||
tools := []map[string]interface{}{
|
||||
{
|
||||
"name": "web_navigate",
|
||||
"description": "Navigate to a URL and optionally take a screenshot",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"url": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "URL to navigate to",
|
||||
},
|
||||
"tab": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Tab ID (optional, uses current tab)",
|
||||
},
|
||||
"screenshot": map[string]interface{}{
|
||||
"type": "boolean",
|
||||
"description": "Take screenshot after navigation",
|
||||
},
|
||||
"timeout": map[string]interface{}{
|
||||
"type": "integer",
|
||||
"description": "Timeout in seconds",
|
||||
"default": 5,
|
||||
},
|
||||
},
|
||||
"required": []string{"url"},
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "web_interact",
|
||||
"description": "Interact with web elements (click, fill, submit)",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"action": map[string]interface{}{
|
||||
"type": "string",
|
||||
"enum": []string{"click", "fill", "submit", "upload"},
|
||||
},
|
||||
"selector": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "CSS selector for the element",
|
||||
},
|
||||
"value": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Value to fill (for fill/upload actions)",
|
||||
},
|
||||
"tab": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Tab ID (optional)",
|
||||
},
|
||||
"timeout": map[string]interface{}{
|
||||
"type": "integer",
|
||||
"description": "Timeout in seconds",
|
||||
"default": 5,
|
||||
},
|
||||
},
|
||||
"required": []string{"action", "selector"},
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "web_extract",
|
||||
"description": "Extract data from the page (source, element HTML, or execute JavaScript)",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"type": map[string]interface{}{
|
||||
"type": "string",
|
||||
"enum": []string{"source", "element", "javascript"},
|
||||
},
|
||||
"selector": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "CSS selector (for element type)",
|
||||
},
|
||||
"code": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "JavaScript code (for javascript type)",
|
||||
},
|
||||
"tab": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Tab ID (optional)",
|
||||
},
|
||||
"timeout": map[string]interface{}{
|
||||
"type": "integer",
|
||||
"description": "Timeout in seconds",
|
||||
"default": 5,
|
||||
},
|
||||
},
|
||||
"required": []string{"type"},
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "web_screenshot",
|
||||
"description": "Take a screenshot of the current page",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"output": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Output file path",
|
||||
},
|
||||
"full_page": map[string]interface{}{
|
||||
"type": "boolean",
|
||||
"description": "Capture full page",
|
||||
"default": false,
|
||||
},
|
||||
"tab": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Tab ID (optional)",
|
||||
},
|
||||
"timeout": map[string]interface{}{
|
||||
"type": "integer",
|
||||
"description": "Timeout in seconds",
|
||||
"default": 5,
|
||||
},
|
||||
},
|
||||
"required": []string{"output"},
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "web_manage_tabs",
|
||||
"description": "Manage browser tabs (open, close, list, switch)",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"action": map[string]interface{}{
|
||||
"type": "string",
|
||||
"enum": []string{"open", "close", "list", "switch"},
|
||||
},
|
||||
"tab": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Tab ID (for close/switch actions)",
|
||||
},
|
||||
"timeout": map[string]interface{}{
|
||||
"type": "integer",
|
||||
"description": "Timeout in seconds",
|
||||
"default": 5,
|
||||
},
|
||||
},
|
||||
"required": []string{"action"},
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "web_iframe",
|
||||
"description": "Switch iframe context for subsequent operations",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"action": map[string]interface{}{
|
||||
"type": "string",
|
||||
"enum": []string{"enter", "exit"},
|
||||
},
|
||||
"selector": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Iframe CSS selector (for enter action)",
|
||||
},
|
||||
"tab": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Tab ID (optional)",
|
||||
},
|
||||
},
|
||||
"required": []string{"action"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return MCPResponse{
|
||||
Result: map[string]interface{}{
|
||||
"tools": tools,
|
||||
},
|
||||
ID: req.ID,
|
||||
}
|
||||
}
|
||||
|
||||
// handleToolCall executes a tool and returns the result
|
||||
func (s *MCPServer) handleToolCall(req MCPRequest) MCPResponse {
|
||||
debugLogger.Log("handleToolCall: Processing tool call request")
|
||||
debugLogger.LogJSON("Tool call request params", req.Params)
|
||||
|
||||
params := req.Params
|
||||
|
||||
toolName, ok := params["name"].(string)
|
||||
if !ok || toolName == "" {
|
||||
debugLogger.Log("handleToolCall: Tool name missing or invalid")
|
||||
return MCPResponse{
|
||||
Error: &MCPError{Code: -32602, Message: "Missing tool name"},
|
||||
ID: req.ID,
|
||||
}
|
||||
}
|
||||
|
||||
debugLogger.Log("handleToolCall: Tool name extracted: %s", toolName)
|
||||
|
||||
arguments, _ := params["arguments"].(map[string]interface{})
|
||||
if arguments == nil {
|
||||
debugLogger.Log("handleToolCall: No arguments provided, using empty map")
|
||||
arguments = make(map[string]interface{})
|
||||
} else {
|
||||
debugLogger.LogJSON("Tool arguments", arguments)
|
||||
}
|
||||
|
||||
var result ToolResult
|
||||
var err error
|
||||
|
||||
debugLogger.Log("handleToolCall: Dispatching to tool handler: %s", toolName)
|
||||
|
||||
switch toolName {
|
||||
case "web_navigate":
|
||||
result, err = s.handleNavigate(arguments)
|
||||
case "web_interact":
|
||||
result, err = s.handleInteract(arguments)
|
||||
case "web_extract":
|
||||
result, err = s.handleExtract(arguments)
|
||||
case "web_screenshot":
|
||||
result, err = s.handleScreenshot(arguments)
|
||||
case "web_manage_tabs":
|
||||
result, err = s.handleManageTabs(arguments)
|
||||
case "web_iframe":
|
||||
result, err = s.handleIframe(arguments)
|
||||
default:
|
||||
debugLogger.Log("handleToolCall: Unknown tool: %s", toolName)
|
||||
return MCPResponse{
|
||||
Error: &MCPError{Code: -32601, Message: fmt.Sprintf("Unknown tool: %s", toolName)},
|
||||
ID: req.ID,
|
||||
}
|
||||
}
|
||||
|
||||
debugLogger.LogJSON("Tool execution result", result)
|
||||
|
||||
if err != nil {
|
||||
debugLogger.Log("handleToolCall: Tool execution error: %v", err)
|
||||
return MCPResponse{
|
||||
Error: &MCPError{Code: -32603, Message: err.Error()},
|
||||
ID: req.ID,
|
||||
}
|
||||
}
|
||||
|
||||
// Always include current state in response
|
||||
result.CurrentTab = s.currentTab
|
||||
result.TabHistory = s.tabHistory
|
||||
result.IframeMode = s.iframeMode
|
||||
|
||||
response := MCPResponse{Result: result, ID: req.ID}
|
||||
debugLogger.LogJSON("Final tool call response", response)
|
||||
debugLogger.Log("handleToolCall: Completed successfully for tool: %s", toolName)
|
||||
|
||||
return response
|
||||
}
|
||||
|
||||
// Helper functions for parameter extraction
|
||||
func getStringParam(params map[string]interface{}, key, defaultValue string) string {
|
||||
if val, ok := params[key].(string); ok {
|
||||
return val
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func getIntParam(params map[string]interface{}, key string, defaultValue int) int {
|
||||
if val, ok := params[key].(float64); ok {
|
||||
return int(val)
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func getBoolParam(params map[string]interface{}, key string, defaultValue bool) bool {
|
||||
if val, ok := params[key].(bool); ok {
|
||||
return val
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
// resolveTabID returns the tab ID to use, defaulting to current tab
|
||||
func (s *MCPServer) resolveTabID(tabID string) string {
|
||||
if tabID != "" {
|
||||
return tabID
|
||||
}
|
||||
return s.currentTab
|
||||
}
|
||||
|
||||
// handleNavigate handles web navigation
|
||||
func (s *MCPServer) handleNavigate(params map[string]interface{}) (ToolResult, error) {
|
||||
url := getStringParam(params, "url", "")
|
||||
if url == "" {
|
||||
return ToolResult{}, fmt.Errorf("url parameter is required")
|
||||
}
|
||||
|
||||
tab := getStringParam(params, "tab", "")
|
||||
screenshot := getBoolParam(params, "screenshot", false)
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
|
||||
// If no tab specified and no current tab, open a new one
|
||||
if tab == "" && s.currentTab == "" {
|
||||
newTab, err := s.client.OpenTab(timeout)
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to open new tab: %w", err)
|
||||
}
|
||||
s.currentTab = newTab
|
||||
s.tabHistory = append(s.tabHistory, newTab)
|
||||
tab = newTab
|
||||
} else if tab == "" {
|
||||
tab = s.currentTab
|
||||
} else {
|
||||
// Update current tab if specified
|
||||
s.currentTab = tab
|
||||
// Move to end of history if it exists, otherwise add it
|
||||
s.removeTabFromHistory(tab)
|
||||
s.tabHistory = append(s.tabHistory, tab)
|
||||
}
|
||||
|
||||
// Navigate to URL
|
||||
err := s.client.LoadURL(tab, url, timeout)
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to navigate to %s: %w", url, err)
|
||||
}
|
||||
|
||||
result := ToolResult{
|
||||
Success: true,
|
||||
Data: map[string]string{
|
||||
"url": url,
|
||||
"tab": tab,
|
||||
},
|
||||
}
|
||||
|
||||
// Take screenshot if requested
|
||||
if screenshot {
|
||||
screenshotPath := fmt.Sprintf("/tmp/navigate-%d.png", time.Now().Unix())
|
||||
err = s.client.TakeScreenshot(tab, screenshotPath, false, timeout)
|
||||
if err != nil {
|
||||
// Don't fail the whole operation for screenshot errors
|
||||
result.Metadata = map[string]string{
|
||||
"screenshot_error": err.Error(),
|
||||
}
|
||||
} else {
|
||||
result.Screenshot = screenshotPath
|
||||
s.screenshots = append(s.screenshots, screenshotPath)
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// handleInteract handles element interactions
|
||||
func (s *MCPServer) handleInteract(params map[string]interface{}) (ToolResult, error) {
|
||||
action := getStringParam(params, "action", "")
|
||||
selector := getStringParam(params, "selector", "")
|
||||
value := getStringParam(params, "value", "")
|
||||
tab := s.resolveTabID(getStringParam(params, "tab", ""))
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
|
||||
if action == "" {
|
||||
return ToolResult{}, fmt.Errorf("action parameter is required")
|
||||
}
|
||||
if selector == "" {
|
||||
return ToolResult{}, fmt.Errorf("selector parameter is required")
|
||||
}
|
||||
if tab == "" {
|
||||
return ToolResult{}, fmt.Errorf("no active tab available")
|
||||
}
|
||||
|
||||
var err error
|
||||
result := ToolResult{Success: true}
|
||||
|
||||
switch action {
|
||||
case "click":
|
||||
err = s.client.ClickElement(tab, selector, timeout, timeout)
|
||||
result.Data = map[string]string{"action": "clicked", "selector": selector}
|
||||
|
||||
case "fill":
|
||||
if value == "" {
|
||||
return ToolResult{}, fmt.Errorf("value parameter is required for fill action")
|
||||
}
|
||||
err = s.client.FillFormField(tab, selector, value, timeout, timeout)
|
||||
result.Data = map[string]string{"action": "filled", "selector": selector, "value": value}
|
||||
|
||||
case "submit":
|
||||
err = s.client.SubmitForm(tab, selector, timeout, timeout)
|
||||
result.Data = map[string]string{"action": "submitted", "selector": selector}
|
||||
|
||||
case "upload":
|
||||
if value == "" {
|
||||
return ToolResult{}, fmt.Errorf("value parameter is required for upload action")
|
||||
}
|
||||
err = s.client.UploadFile(tab, selector, value, timeout, timeout)
|
||||
result.Data = map[string]string{"action": "uploaded", "selector": selector, "file": value}
|
||||
|
||||
default:
|
||||
return ToolResult{}, fmt.Errorf("unknown action: %s", action)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to %s element: %w", action, err)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// handleExtract handles data extraction
|
||||
func (s *MCPServer) handleExtract(params map[string]interface{}) (ToolResult, error) {
|
||||
extractType := getStringParam(params, "type", "")
|
||||
selector := getStringParam(params, "selector", "")
|
||||
code := getStringParam(params, "code", "")
|
||||
tab := s.resolveTabID(getStringParam(params, "tab", ""))
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
|
||||
if extractType == "" {
|
||||
return ToolResult{}, fmt.Errorf("type parameter is required")
|
||||
}
|
||||
if tab == "" {
|
||||
return ToolResult{}, fmt.Errorf("no active tab available")
|
||||
}
|
||||
|
||||
var data string
|
||||
var err error
|
||||
|
||||
switch extractType {
|
||||
case "source":
|
||||
data, err = s.client.GetPageSource(tab, timeout)
|
||||
|
||||
case "element":
|
||||
if selector == "" {
|
||||
return ToolResult{}, fmt.Errorf("selector parameter is required for element type")
|
||||
}
|
||||
data, err = s.client.GetElementHTML(tab, selector, timeout)
|
||||
|
||||
case "javascript":
|
||||
if code == "" {
|
||||
return ToolResult{}, fmt.Errorf("code parameter is required for javascript type")
|
||||
}
|
||||
data, err = s.client.EvalJS(tab, code, timeout)
|
||||
|
||||
default:
|
||||
return ToolResult{}, fmt.Errorf("unknown extract type: %s", extractType)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to extract %s: %w", extractType, err)
|
||||
}
|
||||
|
||||
return ToolResult{
|
||||
Success: true,
|
||||
Data: data,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// handleScreenshot handles screenshot capture
|
||||
func (s *MCPServer) handleScreenshot(params map[string]interface{}) (ToolResult, error) {
|
||||
output := getStringParam(params, "output", "")
|
||||
if output == "" {
|
||||
output = fmt.Sprintf("/tmp/screenshot-%d.png", time.Now().Unix())
|
||||
}
|
||||
|
||||
tab := s.resolveTabID(getStringParam(params, "tab", ""))
|
||||
fullPage := getBoolParam(params, "full_page", false)
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
|
||||
if tab == "" {
|
||||
return ToolResult{}, fmt.Errorf("no active tab available")
|
||||
}
|
||||
|
||||
err := s.client.TakeScreenshot(tab, output, fullPage, timeout)
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to take screenshot: %w", err)
|
||||
}
|
||||
|
||||
s.screenshots = append(s.screenshots, output)
|
||||
|
||||
return ToolResult{
|
||||
Success: true,
|
||||
Screenshot: output,
|
||||
Data: map[string]interface{}{
|
||||
"output": output,
|
||||
"full_page": fullPage,
|
||||
"tab": tab,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// handleManageTabs handles tab management operations
|
||||
func (s *MCPServer) handleManageTabs(params map[string]interface{}) (ToolResult, error) {
|
||||
action := getStringParam(params, "action", "")
|
||||
tab := getStringParam(params, "tab", "")
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
|
||||
if action == "" {
|
||||
return ToolResult{}, fmt.Errorf("action parameter is required")
|
||||
}
|
||||
|
||||
var data interface{}
|
||||
var err error
|
||||
|
||||
switch action {
|
||||
case "open":
|
||||
newTab, err := s.client.OpenTab(timeout)
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to open tab: %w", err)
|
||||
}
|
||||
s.currentTab = newTab
|
||||
s.tabHistory = append(s.tabHistory, newTab)
|
||||
data = map[string]string{"tab": newTab, "action": "opened"}
|
||||
|
||||
case "close":
|
||||
targetTab := s.resolveTabID(tab)
|
||||
if targetTab == "" {
|
||||
return ToolResult{}, fmt.Errorf("no tab to close")
|
||||
}
|
||||
err = s.client.CloseTab(targetTab, timeout)
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to close tab: %w", err)
|
||||
}
|
||||
s.removeTabFromHistory(targetTab)
|
||||
data = map[string]string{"tab": targetTab, "action": "closed"}
|
||||
|
||||
case "list":
|
||||
tabs, err := s.client.ListTabs()
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to list tabs: %w", err)
|
||||
}
|
||||
data = tabs
|
||||
|
||||
case "switch":
|
||||
if tab == "" {
|
||||
return ToolResult{}, fmt.Errorf("tab parameter is required for switch action")
|
||||
}
|
||||
s.currentTab = tab
|
||||
s.removeTabFromHistory(tab)
|
||||
s.tabHistory = append(s.tabHistory, tab)
|
||||
data = map[string]string{"tab": tab, "action": "switched"}
|
||||
|
||||
default:
|
||||
return ToolResult{}, fmt.Errorf("unknown tab action: %s", action)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return ToolResult{}, err
|
||||
}
|
||||
|
||||
return ToolResult{
|
||||
Success: true,
|
||||
Data: data,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// handleIframe handles iframe context switching
|
||||
func (s *MCPServer) handleIframe(params map[string]interface{}) (ToolResult, error) {
|
||||
action := getStringParam(params, "action", "")
|
||||
selector := getStringParam(params, "selector", "")
|
||||
tab := s.resolveTabID(getStringParam(params, "tab", ""))
|
||||
|
||||
if action == "" {
|
||||
return ToolResult{}, fmt.Errorf("action parameter is required")
|
||||
}
|
||||
|
||||
if tab == "" {
|
||||
return ToolResult{}, fmt.Errorf("no active tab available")
|
||||
}
|
||||
|
||||
var err error
|
||||
var data map[string]string
|
||||
|
||||
switch action {
|
||||
case "enter":
|
||||
if selector == "" {
|
||||
return ToolResult{}, fmt.Errorf("selector parameter is required for enter action")
|
||||
}
|
||||
err = s.client.SwitchToIframe(tab, selector)
|
||||
s.iframeMode = true
|
||||
data = map[string]string{"action": "entered", "selector": selector}
|
||||
|
||||
case "exit":
|
||||
err = s.client.SwitchToMain(tab)
|
||||
s.iframeMode = false
|
||||
data = map[string]string{"action": "exited"}
|
||||
|
||||
default:
|
||||
return ToolResult{}, fmt.Errorf("unknown iframe action: %s", action)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to %s iframe: %w", action, err)
|
||||
}
|
||||
|
||||
return ToolResult{
|
||||
Success: true,
|
||||
Data: data,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// removeTabFromHistory removes a tab from history and updates current tab
|
||||
func (s *MCPServer) removeTabFromHistory(tabID string) {
|
||||
for i, id := range s.tabHistory {
|
||||
if id == tabID {
|
||||
s.tabHistory = append(s.tabHistory[:i], s.tabHistory[i+1:]...)
|
||||
break
|
||||
}
|
||||
}
|
||||
if s.currentTab == tabID {
|
||||
if len(s.tabHistory) > 0 {
|
||||
s.currentTab = s.tabHistory[len(s.tabHistory)-1]
|
||||
} else {
|
||||
s.currentTab = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Initialize debug logger
|
||||
var err error
|
||||
debugLogger, err = NewDebugLogger()
|
||||
if err != nil {
|
||||
log.Printf("Warning: Failed to initialize debug logger: %v", err)
|
||||
// Continue without debug logging
|
||||
} else {
|
||||
defer debugLogger.Close()
|
||||
debugLogger.Log("MCP STDIO Server starting up")
|
||||
}
|
||||
|
||||
// Set up signal handling for graceful shutdown
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
host := os.Getenv("CREMOTE_HOST")
|
||||
if host == "" {
|
||||
host = "localhost"
|
||||
}
|
||||
|
||||
portStr := os.Getenv("CREMOTE_PORT")
|
||||
port := 8989
|
||||
if portStr != "" {
|
||||
if p, err := strconv.Atoi(portStr); err == nil {
|
||||
port = p
|
||||
}
|
||||
}
|
||||
|
||||
debugLogger.Log("Connecting to cremote daemon at %s:%d", host, port)
|
||||
log.Printf("Starting MCP stdio server, connecting to cremote daemon at %s:%d", host, port)
|
||||
server := NewMCPServer(host, port)
|
||||
|
||||
// Create a buffered reader for better EOF handling
|
||||
reader := bufio.NewReader(os.Stdin)
|
||||
|
||||
log.Printf("MCP stdio server ready, waiting for requests...")
|
||||
|
||||
// Channel to signal when to stop reading
|
||||
done := make(chan bool)
|
||||
|
||||
// Goroutine to handle stdin reading
|
||||
go func() {
|
||||
defer close(done)
|
||||
for {
|
||||
// Read headers for Content-Length framing (use the same reader for headers and body)
|
||||
contentLength := -1
|
||||
for {
|
||||
line, err := reader.ReadString('\n')
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
log.Printf("Input stream closed, shutting down MCP server")
|
||||
return
|
||||
}
|
||||
log.Printf("Error reading header: %v", err)
|
||||
return
|
||||
}
|
||||
line = strings.TrimRight(line, "\r\n")
|
||||
if line == "" {
|
||||
break // end of headers
|
||||
}
|
||||
const prefix = "content-length: "
|
||||
low := strings.ToLower(line)
|
||||
if strings.HasPrefix(low, prefix) {
|
||||
var err error
|
||||
contentLength, err = strconv.Atoi(strings.TrimSpace(low[len(prefix):]))
|
||||
if err != nil {
|
||||
log.Printf("Invalid Content-Length: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if contentLength < 0 {
|
||||
log.Printf("Missing Content-Length header")
|
||||
return
|
||||
}
|
||||
|
||||
// Read body of specified length
|
||||
debugLogger.Log("Reading request body of length: %d", contentLength)
|
||||
body := make([]byte, contentLength)
|
||||
if _, err := io.ReadFull(reader, body); err != nil {
|
||||
debugLogger.Log("Error reading body: %v", err)
|
||||
log.Printf("Error reading body: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
debugLogger.Log("Raw request body: %s", string(body))
|
||||
|
||||
var req MCPRequest
|
||||
if err := json.Unmarshal(body, &req); err != nil {
|
||||
debugLogger.Log("Error decoding request body: %v", err)
|
||||
log.Printf("Error decoding request body: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
debugLogger.Log("Successfully parsed request: %s (ID: %v)", req.Method, req.ID)
|
||||
log.Printf("Processing request: %s (ID: %v)", req.Method, req.ID)
|
||||
|
||||
resp := server.HandleRequest(req)
|
||||
resp.JSONRPC = "2.0"
|
||||
|
||||
// Write Content-Length framed response
|
||||
responseBytes, err := json.Marshal(resp)
|
||||
if err != nil {
|
||||
debugLogger.Log("Error marshaling response: %v", err)
|
||||
log.Printf("Error marshaling response: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
debugLogger.Log("Sending response with Content-Length: %d", len(responseBytes))
|
||||
debugLogger.Log("Raw response: %s", string(responseBytes))
|
||||
|
||||
fmt.Fprintf(os.Stdout, "Content-Length: %d\r\n\r\n", len(responseBytes))
|
||||
os.Stdout.Write(responseBytes)
|
||||
|
||||
debugLogger.Log("Response sent successfully for request: %s", req.Method)
|
||||
log.Printf("Completed request: %s", req.Method)
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait for either completion or signal
|
||||
select {
|
||||
case <-done:
|
||||
log.Printf("Input processing completed")
|
||||
case sig := <-sigChan:
|
||||
log.Printf("Received signal %v, shutting down", sig)
|
||||
}
|
||||
|
||||
log.Printf("MCP stdio server shutdown complete")
|
||||
}
|
|
@ -0,0 +1,822 @@
|
|||
//go:build mcp_http
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"git.teamworkapps.com/shortcut/cremote/client"
|
||||
)
|
||||
|
||||
// DebugLogger handles debug logging to file
|
||||
type DebugLogger struct {
|
||||
file *os.File
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewDebugLogger creates a new debug logger
|
||||
func NewDebugLogger() (*DebugLogger, error) {
|
||||
logDir := "/tmp/cremote-mcp-logs"
|
||||
if err := os.MkdirAll(logDir, 0755); err != nil {
|
||||
return nil, fmt.Errorf("failed to create log directory: %w", err)
|
||||
}
|
||||
|
||||
logFile := filepath.Join(logDir, fmt.Sprintf("mcp-http-%d.log", time.Now().Unix()))
|
||||
file, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open log file: %w", err)
|
||||
}
|
||||
|
||||
logger := log.New(file, "", log.LstdFlags|log.Lmicroseconds|log.Lshortfile)
|
||||
logger.Printf("=== MCP HTTP Server Debug Log Started ===")
|
||||
|
||||
return &DebugLogger{
|
||||
file: file,
|
||||
logger: logger,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Log writes a debug message
|
||||
func (d *DebugLogger) Log(format string, args ...interface{}) {
|
||||
if d != nil && d.logger != nil {
|
||||
d.logger.Printf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
// LogJSON logs a JSON object with a label
|
||||
func (d *DebugLogger) LogJSON(label string, obj interface{}) {
|
||||
if d != nil && d.logger != nil {
|
||||
jsonBytes, err := json.MarshalIndent(obj, "", " ")
|
||||
if err != nil {
|
||||
d.logger.Printf("%s: JSON marshal error: %v", label, err)
|
||||
} else {
|
||||
d.logger.Printf("%s:\n%s", label, string(jsonBytes))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close closes the debug logger
|
||||
func (d *DebugLogger) Close() {
|
||||
if d != nil && d.file != nil {
|
||||
d.logger.Printf("=== MCP HTTP Server Debug Log Ended ===")
|
||||
d.file.Close()
|
||||
}
|
||||
}
|
||||
|
||||
var debugLogger *DebugLogger
|
||||
|
||||
// MCPServer wraps the cremote client with MCP protocol
|
||||
type MCPServer struct {
|
||||
client *client.Client
|
||||
currentTab string
|
||||
tabHistory []string
|
||||
iframeMode bool
|
||||
lastError string
|
||||
screenshots []string
|
||||
}
|
||||
|
||||
// MCPRequest represents an incoming MCP request
|
||||
type MCPRequest struct {
|
||||
Method string `json:"method"`
|
||||
Params map[string]interface{} `json:"params"`
|
||||
ID interface{} `json:"id"`
|
||||
}
|
||||
|
||||
// MCPResponse represents an MCP response
|
||||
type MCPResponse struct {
|
||||
Result interface{} `json:"result,omitempty"`
|
||||
Error *MCPError `json:"error,omitempty"`
|
||||
ID interface{} `json:"id"`
|
||||
}
|
||||
|
||||
// MCPError represents an MCP error
|
||||
type MCPError struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
// ToolResult represents the result of a tool execution
|
||||
type ToolResult struct {
|
||||
Success bool `json:"success"`
|
||||
Data interface{} `json:"data,omitempty"`
|
||||
Screenshot string `json:"screenshot,omitempty"`
|
||||
CurrentTab string `json:"current_tab,omitempty"`
|
||||
TabHistory []string `json:"tab_history,omitempty"`
|
||||
IframeMode bool `json:"iframe_mode"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Metadata map[string]string `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
// NewMCPServer creates a new MCP server instance
|
||||
func NewMCPServer(host string, port int) *MCPServer {
|
||||
return &MCPServer{
|
||||
client: client.NewClient(host, port),
|
||||
tabHistory: make([]string, 0),
|
||||
screenshots: make([]string, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// HandleRequest processes an MCP request
|
||||
func (s *MCPServer) HandleRequest(req MCPRequest) MCPResponse {
|
||||
debugLogger.Log("HandleRequest called with method: %s, ID: %v", req.Method, req.ID)
|
||||
debugLogger.LogJSON("Incoming Request", req)
|
||||
|
||||
var resp MCPResponse
|
||||
|
||||
switch req.Method {
|
||||
case "initialize":
|
||||
debugLogger.Log("Handling initialize request")
|
||||
resp = s.handleInitialize(req)
|
||||
case "tools/list":
|
||||
debugLogger.Log("Handling tools/list request")
|
||||
resp = s.handleToolsList(req)
|
||||
case "tools/call":
|
||||
debugLogger.Log("Handling tools/call request")
|
||||
resp = s.handleToolCall(req)
|
||||
default:
|
||||
debugLogger.Log("Unknown method: %s", req.Method)
|
||||
resp = MCPResponse{
|
||||
Error: &MCPError{Code: -32601, Message: "Method not found"},
|
||||
ID: req.ID,
|
||||
}
|
||||
}
|
||||
|
||||
debugLogger.LogJSON("Response", resp)
|
||||
debugLogger.Log("HandleRequest completed for method: %s", req.Method)
|
||||
return resp
|
||||
}
|
||||
|
||||
// handleInitialize handles the MCP initialize request
|
||||
func (s *MCPServer) handleInitialize(req MCPRequest) MCPResponse {
|
||||
return MCPResponse{
|
||||
Result: map[string]interface{}{
|
||||
"protocolVersion": "2024-11-05",
|
||||
"capabilities": map[string]interface{}{
|
||||
"tools": map[string]interface{}{
|
||||
"listChanged": true,
|
||||
},
|
||||
},
|
||||
"serverInfo": map[string]interface{}{
|
||||
"name": "cremote-mcp",
|
||||
"version": "1.0.0",
|
||||
},
|
||||
},
|
||||
ID: req.ID,
|
||||
}
|
||||
}
|
||||
|
||||
// handleToolsList returns the list of available tools
|
||||
func (s *MCPServer) handleToolsList(req MCPRequest) MCPResponse {
|
||||
tools := []map[string]interface{}{
|
||||
{
|
||||
"name": "web_navigate",
|
||||
"description": "Navigate to a URL and optionally take a screenshot",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"url": map[string]interface{}{"type": "string", "description": "URL to navigate to"},
|
||||
"tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional, uses current tab)"},
|
||||
"screenshot": map[string]interface{}{"type": "boolean", "description": "Take screenshot after navigation"},
|
||||
"timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5},
|
||||
},
|
||||
"required": []string{"url"},
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "web_interact",
|
||||
"description": "Interact with web elements (click, fill, submit)",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"action": map[string]interface{}{"type": "string", "enum": []string{"click", "fill", "submit", "upload"}},
|
||||
"selector": map[string]interface{}{"type": "string", "description": "CSS selector for the element"},
|
||||
"value": map[string]interface{}{"type": "string", "description": "Value to fill (for fill/upload actions)"},
|
||||
"tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional)"},
|
||||
"timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5},
|
||||
},
|
||||
"required": []string{"action", "selector"},
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "web_extract",
|
||||
"description": "Extract data from the page (source, element HTML, or execute JavaScript)",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"type": map[string]interface{}{"type": "string", "enum": []string{"source", "element", "javascript"}},
|
||||
"selector": map[string]interface{}{"type": "string", "description": "CSS selector (for element type)"},
|
||||
"code": map[string]interface{}{"type": "string", "description": "JavaScript code (for javascript type)"},
|
||||
"tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional)"},
|
||||
"timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5},
|
||||
},
|
||||
"required": []string{"type"},
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "web_screenshot",
|
||||
"description": "Take a screenshot of the current page",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"output": map[string]interface{}{"type": "string", "description": "Output file path"},
|
||||
"full_page": map[string]interface{}{"type": "boolean", "description": "Capture full page", "default": false},
|
||||
"tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional)"},
|
||||
"timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5},
|
||||
},
|
||||
"required": []string{"output"},
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "web_manage_tabs",
|
||||
"description": "Manage browser tabs (open, close, list, switch)",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"action": map[string]interface{}{"type": "string", "enum": []string{"open", "close", "list", "switch"}},
|
||||
"tab": map[string]interface{}{"type": "string", "description": "Tab ID (for close/switch actions)"},
|
||||
"timeout": map[string]interface{}{"type": "integer", "description": "Timeout in seconds", "default": 5},
|
||||
},
|
||||
"required": []string{"action"},
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "web_iframe",
|
||||
"description": "Switch iframe context for subsequent operations",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"action": map[string]interface{}{"type": "string", "enum": []string{"enter", "exit"}},
|
||||
"selector": map[string]interface{}{"type": "string", "description": "Iframe CSS selector (for enter action)"},
|
||||
"tab": map[string]interface{}{"type": "string", "description": "Tab ID (optional)"},
|
||||
},
|
||||
"required": []string{"action"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return MCPResponse{
|
||||
Result: map[string]interface{}{"tools": tools},
|
||||
ID: req.ID,
|
||||
}
|
||||
}
|
||||
|
||||
// handleToolCall executes a tool call
|
||||
func (s *MCPServer) handleToolCall(req MCPRequest) MCPResponse {
|
||||
params, ok := req.Params["arguments"].(map[string]interface{})
|
||||
if !ok {
|
||||
return MCPResponse{
|
||||
Error: &MCPError{Code: -32602, Message: "Invalid parameters"},
|
||||
ID: req.ID,
|
||||
}
|
||||
}
|
||||
|
||||
toolName, ok := req.Params["name"].(string)
|
||||
if !ok {
|
||||
return MCPResponse{
|
||||
Error: &MCPError{Code: -32602, Message: "Tool name required"},
|
||||
ID: req.ID,
|
||||
}
|
||||
}
|
||||
|
||||
var result ToolResult
|
||||
var err error
|
||||
|
||||
switch toolName {
|
||||
case "web_navigate":
|
||||
result, err = s.handleNavigate(params)
|
||||
case "web_interact":
|
||||
result, err = s.handleInteract(params)
|
||||
case "web_extract":
|
||||
result, err = s.handleExtract(params)
|
||||
case "web_screenshot":
|
||||
result, err = s.handleScreenshot(params)
|
||||
case "web_manage_tabs":
|
||||
result, err = s.handleManageTabs(params)
|
||||
case "web_iframe":
|
||||
result, err = s.handleIframe(params)
|
||||
default:
|
||||
return MCPResponse{
|
||||
Error: &MCPError{Code: -32601, Message: "Unknown tool: " + toolName},
|
||||
ID: req.ID,
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
result.Success = false
|
||||
result.Error = err.Error()
|
||||
s.lastError = err.Error()
|
||||
}
|
||||
|
||||
// Always include current state in response
|
||||
result.CurrentTab = s.currentTab
|
||||
result.TabHistory = s.tabHistory
|
||||
result.IframeMode = s.iframeMode
|
||||
|
||||
return MCPResponse{
|
||||
Result: result,
|
||||
ID: req.ID,
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to get string parameter with default
|
||||
func getStringParam(params map[string]interface{}, key, defaultValue string) string {
|
||||
if val, ok := params[key].(string); ok {
|
||||
return val
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
// Helper function to get int parameter with default
|
||||
func getIntParam(params map[string]interface{}, key string, defaultValue int) int {
|
||||
if val, ok := params[key].(float64); ok {
|
||||
return int(val)
|
||||
}
|
||||
if val, ok := params[key].(int); ok {
|
||||
return val
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
// Helper function to get bool parameter with default
|
||||
func getBoolParam(params map[string]interface{}, key string, defaultValue bool) bool {
|
||||
if val, ok := params[key].(bool); ok {
|
||||
return val
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
// Helper function to resolve tab ID
|
||||
func (s *MCPServer) resolveTabID(tabParam string) string {
|
||||
if tabParam != "" {
|
||||
return tabParam
|
||||
}
|
||||
return s.currentTab
|
||||
}
|
||||
|
||||
// handleNavigate handles web navigation
|
||||
func (s *MCPServer) handleNavigate(params map[string]interface{}) (ToolResult, error) {
|
||||
url := getStringParam(params, "url", "")
|
||||
if url == "" {
|
||||
return ToolResult{}, fmt.Errorf("url parameter is required")
|
||||
}
|
||||
|
||||
tab := getStringParam(params, "tab", "")
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
takeScreenshot := getBoolParam(params, "screenshot", false)
|
||||
|
||||
// If no tab specified and we don't have a current tab, open one
|
||||
if tab == "" && s.currentTab == "" {
|
||||
newTab, err := s.client.OpenTab(timeout)
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to open new tab: %w", err)
|
||||
}
|
||||
s.currentTab = newTab
|
||||
s.tabHistory = append(s.tabHistory, newTab)
|
||||
tab = newTab
|
||||
} else if tab == "" {
|
||||
tab = s.currentTab
|
||||
}
|
||||
|
||||
// Load the URL
|
||||
err := s.client.LoadURL(tab, url, timeout)
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to load URL: %w", err)
|
||||
}
|
||||
|
||||
result := ToolResult{
|
||||
Success: true,
|
||||
Data: map[string]string{"url": url, "tab": tab},
|
||||
}
|
||||
|
||||
// Take screenshot if requested
|
||||
if takeScreenshot {
|
||||
screenshotPath := fmt.Sprintf("/tmp/navigate-%d.png", time.Now().Unix())
|
||||
err = s.client.TakeScreenshot(tab, screenshotPath, false, timeout)
|
||||
if err == nil {
|
||||
result.Screenshot = screenshotPath
|
||||
s.screenshots = append(s.screenshots, screenshotPath)
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// handleInteract handles web element interactions
|
||||
func (s *MCPServer) handleInteract(params map[string]interface{}) (ToolResult, error) {
|
||||
action := getStringParam(params, "action", "")
|
||||
selector := getStringParam(params, "selector", "")
|
||||
value := getStringParam(params, "value", "")
|
||||
tab := s.resolveTabID(getStringParam(params, "tab", ""))
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
|
||||
if action == "" || selector == "" {
|
||||
return ToolResult{}, fmt.Errorf("action and selector parameters are required")
|
||||
}
|
||||
|
||||
if tab == "" {
|
||||
return ToolResult{}, fmt.Errorf("no active tab available")
|
||||
}
|
||||
|
||||
var err error
|
||||
result := ToolResult{Success: true}
|
||||
|
||||
switch action {
|
||||
case "click":
|
||||
err = s.client.ClickElement(tab, selector, timeout, timeout)
|
||||
result.Data = map[string]string{"action": "clicked", "selector": selector}
|
||||
|
||||
case "fill":
|
||||
if value == "" {
|
||||
return ToolResult{}, fmt.Errorf("value parameter is required for fill action")
|
||||
}
|
||||
err = s.client.FillFormField(tab, selector, value, timeout, timeout)
|
||||
result.Data = map[string]string{"action": "filled", "selector": selector, "value": value}
|
||||
|
||||
case "submit":
|
||||
err = s.client.SubmitForm(tab, selector, timeout, timeout)
|
||||
result.Data = map[string]string{"action": "submitted", "selector": selector}
|
||||
|
||||
case "upload":
|
||||
if value == "" {
|
||||
return ToolResult{}, fmt.Errorf("value parameter (file path) is required for upload action")
|
||||
}
|
||||
err = s.client.UploadFile(tab, selector, value, timeout, timeout)
|
||||
result.Data = map[string]string{"action": "uploaded", "selector": selector, "file": value}
|
||||
|
||||
default:
|
||||
return ToolResult{}, fmt.Errorf("unknown action: %s", action)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to %s element: %w", action, err)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// handleExtract handles data extraction from pages
|
||||
func (s *MCPServer) handleExtract(params map[string]interface{}) (ToolResult, error) {
|
||||
extractType := getStringParam(params, "type", "")
|
||||
selector := getStringParam(params, "selector", "")
|
||||
code := getStringParam(params, "code", "")
|
||||
tab := s.resolveTabID(getStringParam(params, "tab", ""))
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
|
||||
if extractType == "" {
|
||||
return ToolResult{}, fmt.Errorf("type parameter is required")
|
||||
}
|
||||
|
||||
if tab == "" {
|
||||
return ToolResult{}, fmt.Errorf("no active tab available")
|
||||
}
|
||||
|
||||
var data interface{}
|
||||
var err error
|
||||
|
||||
switch extractType {
|
||||
case "source":
|
||||
data, err = s.client.GetPageSource(tab, timeout)
|
||||
|
||||
case "element":
|
||||
if selector == "" {
|
||||
return ToolResult{}, fmt.Errorf("selector parameter is required for element extraction")
|
||||
}
|
||||
data, err = s.client.GetElementHTML(tab, selector, timeout)
|
||||
|
||||
case "javascript":
|
||||
if code == "" {
|
||||
return ToolResult{}, fmt.Errorf("code parameter is required for javascript extraction")
|
||||
}
|
||||
data, err = s.client.EvalJS(tab, code, timeout)
|
||||
|
||||
default:
|
||||
return ToolResult{}, fmt.Errorf("unknown extraction type: %s", extractType)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to extract %s: %w", extractType, err)
|
||||
}
|
||||
|
||||
return ToolResult{
|
||||
Success: true,
|
||||
Data: data,
|
||||
Metadata: map[string]string{
|
||||
"type": extractType,
|
||||
"selector": selector,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// handleScreenshot handles screenshot capture
|
||||
func (s *MCPServer) handleScreenshot(params map[string]interface{}) (ToolResult, error) {
|
||||
output := getStringParam(params, "output", "")
|
||||
if output == "" {
|
||||
output = fmt.Sprintf("/tmp/screenshot-%d.png", time.Now().Unix())
|
||||
}
|
||||
|
||||
tab := s.resolveTabID(getStringParam(params, "tab", ""))
|
||||
fullPage := getBoolParam(params, "full_page", false)
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
|
||||
if tab == "" {
|
||||
return ToolResult{}, fmt.Errorf("no active tab available")
|
||||
}
|
||||
|
||||
err := s.client.TakeScreenshot(tab, output, fullPage, timeout)
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to take screenshot: %w", err)
|
||||
}
|
||||
|
||||
s.screenshots = append(s.screenshots, output)
|
||||
|
||||
return ToolResult{
|
||||
Success: true,
|
||||
Screenshot: output,
|
||||
Data: map[string]interface{}{
|
||||
"output": output,
|
||||
"full_page": fullPage,
|
||||
"tab": tab,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// handleManageTabs handles tab management operations
|
||||
func (s *MCPServer) handleManageTabs(params map[string]interface{}) (ToolResult, error) {
|
||||
action := getStringParam(params, "action", "")
|
||||
tab := getStringParam(params, "tab", "")
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
|
||||
if action == "" {
|
||||
return ToolResult{}, fmt.Errorf("action parameter is required")
|
||||
}
|
||||
|
||||
var data interface{}
|
||||
var err error
|
||||
|
||||
switch action {
|
||||
case "open":
|
||||
newTab, err := s.client.OpenTab(timeout)
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to open tab: %w", err)
|
||||
}
|
||||
s.currentTab = newTab
|
||||
s.tabHistory = append(s.tabHistory, newTab)
|
||||
data = map[string]string{"tab": newTab, "action": "opened"}
|
||||
|
||||
case "close":
|
||||
targetTab := s.resolveTabID(tab)
|
||||
if targetTab == "" {
|
||||
return ToolResult{}, fmt.Errorf("no tab to close")
|
||||
}
|
||||
err = s.client.CloseTab(targetTab, timeout)
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to close tab: %w", err)
|
||||
}
|
||||
// Remove from history and update current tab
|
||||
s.removeTabFromHistory(targetTab)
|
||||
data = map[string]string{"tab": targetTab, "action": "closed"}
|
||||
|
||||
case "list":
|
||||
tabs, err := s.client.ListTabs()
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to list tabs: %w", err)
|
||||
}
|
||||
data = tabs
|
||||
|
||||
case "switch":
|
||||
if tab == "" {
|
||||
return ToolResult{}, fmt.Errorf("tab parameter is required for switch action")
|
||||
}
|
||||
s.currentTab = tab
|
||||
// Move to end of history if it exists, otherwise add it
|
||||
s.removeTabFromHistory(tab)
|
||||
s.tabHistory = append(s.tabHistory, tab)
|
||||
data = map[string]string{"tab": tab, "action": "switched"}
|
||||
|
||||
default:
|
||||
return ToolResult{}, fmt.Errorf("unknown tab action: %s", action)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return ToolResult{}, err
|
||||
}
|
||||
|
||||
return ToolResult{
|
||||
Success: true,
|
||||
Data: data,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// handleIframe handles iframe context switching
|
||||
func (s *MCPServer) handleIframe(params map[string]interface{}) (ToolResult, error) {
|
||||
action := getStringParam(params, "action", "")
|
||||
selector := getStringParam(params, "selector", "")
|
||||
tab := s.resolveTabID(getStringParam(params, "tab", ""))
|
||||
|
||||
if action == "" {
|
||||
return ToolResult{}, fmt.Errorf("action parameter is required")
|
||||
}
|
||||
|
||||
if tab == "" {
|
||||
return ToolResult{}, fmt.Errorf("no active tab available")
|
||||
}
|
||||
|
||||
var err error
|
||||
var data map[string]string
|
||||
|
||||
switch action {
|
||||
case "enter":
|
||||
if selector == "" {
|
||||
return ToolResult{}, fmt.Errorf("selector parameter is required for enter action")
|
||||
}
|
||||
err = s.client.SwitchToIframe(tab, selector)
|
||||
s.iframeMode = true
|
||||
data = map[string]string{"action": "entered", "selector": selector}
|
||||
|
||||
case "exit":
|
||||
err = s.client.SwitchToMain(tab)
|
||||
s.iframeMode = false
|
||||
data = map[string]string{"action": "exited"}
|
||||
|
||||
default:
|
||||
return ToolResult{}, fmt.Errorf("unknown iframe action: %s", action)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return ToolResult{}, fmt.Errorf("failed to %s iframe: %w", action, err)
|
||||
}
|
||||
|
||||
return ToolResult{
|
||||
Success: true,
|
||||
Data: data,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Helper function to remove tab from history
|
||||
func (s *MCPServer) removeTabFromHistory(tabID string) {
|
||||
for i, id := range s.tabHistory {
|
||||
if id == tabID {
|
||||
s.tabHistory = append(s.tabHistory[:i], s.tabHistory[i+1:]...)
|
||||
break
|
||||
}
|
||||
}
|
||||
// If we removed the current tab, set current to the last in history
|
||||
if s.currentTab == tabID {
|
||||
if len(s.tabHistory) > 0 {
|
||||
s.currentTab = s.tabHistory[len(s.tabHistory)-1]
|
||||
} else {
|
||||
s.currentTab = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// HTTP handler for MCP requests
|
||||
func (s *MCPServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
debugLogger.Log("ServeHTTP: Received %s request from %s", r.Method, r.RemoteAddr)
|
||||
debugLogger.Log("ServeHTTP: Request URL: %s", r.URL.String())
|
||||
debugLogger.Log("ServeHTTP: Request headers: %v", r.Header)
|
||||
|
||||
// Set CORS headers for browser compatibility
|
||||
w.Header().Set("Access-Control-Allow-Origin", "*")
|
||||
w.Header().Set("Access-Control-Allow-Methods", "POST, OPTIONS")
|
||||
w.Header().Set("Access-Control-Allow-Headers", "Content-Type")
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
|
||||
// Handle preflight requests
|
||||
if r.Method == "OPTIONS" {
|
||||
debugLogger.Log("ServeHTTP: Handling OPTIONS preflight request")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return
|
||||
}
|
||||
|
||||
// Only accept POST requests
|
||||
if r.Method != "POST" {
|
||||
debugLogger.Log("ServeHTTP: Method not allowed: %s", r.Method)
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
// Read and decode the request
|
||||
var req MCPRequest
|
||||
decoder := json.NewDecoder(r.Body)
|
||||
if err := decoder.Decode(&req); err != nil {
|
||||
debugLogger.Log("ServeHTTP: Error decoding request: %v", err)
|
||||
log.Printf("Error decoding request: %v", err)
|
||||
http.Error(w, "Invalid JSON", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
debugLogger.LogJSON("HTTP Request", req)
|
||||
log.Printf("Processing HTTP request: %s (ID: %v)", req.Method, req.ID)
|
||||
|
||||
// Handle the request
|
||||
resp := s.HandleRequest(req)
|
||||
|
||||
debugLogger.LogJSON("HTTP Response", resp)
|
||||
|
||||
// Encode and send the response
|
||||
encoder := json.NewEncoder(w)
|
||||
if err := encoder.Encode(resp); err != nil {
|
||||
debugLogger.Log("ServeHTTP: Error encoding response: %v", err)
|
||||
log.Printf("Error encoding response: %v", err)
|
||||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
debugLogger.Log("ServeHTTP: Response sent successfully for request: %s", req.Method)
|
||||
log.Printf("Completed HTTP request: %s", req.Method)
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Initialize debug logger
|
||||
var err error
|
||||
debugLogger, err = NewDebugLogger()
|
||||
if err != nil {
|
||||
log.Printf("Warning: Failed to initialize debug logger: %v", err)
|
||||
// Continue without debug logging
|
||||
} else {
|
||||
defer debugLogger.Close()
|
||||
debugLogger.Log("MCP HTTP Server starting up")
|
||||
}
|
||||
|
||||
// Get cremote daemon connection settings
|
||||
cremoteHost := os.Getenv("CREMOTE_HOST")
|
||||
if cremoteHost == "" {
|
||||
cremoteHost = "localhost"
|
||||
}
|
||||
|
||||
cremotePortStr := os.Getenv("CREMOTE_PORT")
|
||||
cremotePort := 8989
|
||||
if cremotePortStr != "" {
|
||||
if p, err := strconv.Atoi(cremotePortStr); err == nil {
|
||||
cremotePort = p
|
||||
}
|
||||
}
|
||||
|
||||
// Get HTTP server settings
|
||||
httpHost := os.Getenv("MCP_HOST")
|
||||
if httpHost == "" {
|
||||
httpHost = "localhost"
|
||||
}
|
||||
|
||||
httpPortStr := os.Getenv("MCP_PORT")
|
||||
httpPort := 8990
|
||||
if httpPortStr != "" {
|
||||
if p, err := strconv.Atoi(httpPortStr); err == nil {
|
||||
httpPort = p
|
||||
}
|
||||
}
|
||||
|
||||
debugLogger.Log("HTTP server will listen on %s:%d", httpHost, httpPort)
|
||||
debugLogger.Log("Connecting to cremote daemon at %s:%d", cremoteHost, cremotePort)
|
||||
|
||||
log.Printf("Starting MCP HTTP server on %s:%d", httpHost, httpPort)
|
||||
log.Printf("Connecting to cremote daemon at %s:%d", cremoteHost, cremotePort)
|
||||
|
||||
// Create the MCP server
|
||||
mcpServer := NewMCPServer(cremoteHost, cremotePort)
|
||||
|
||||
// Set up HTTP routes
|
||||
http.Handle("/mcp", mcpServer)
|
||||
|
||||
// Health check endpoint
|
||||
http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]string{
|
||||
"status": "healthy",
|
||||
"cremote_host": cremoteHost,
|
||||
"cremote_port": strconv.Itoa(cremotePort),
|
||||
})
|
||||
})
|
||||
|
||||
// Root endpoint with basic info
|
||||
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"service": "Cremote MCP Server",
|
||||
"version": "1.0.0",
|
||||
"endpoints": map[string]string{
|
||||
"mcp": "/mcp",
|
||||
"health": "/health",
|
||||
},
|
||||
"cremote_daemon": fmt.Sprintf("%s:%d", cremoteHost, cremotePort),
|
||||
})
|
||||
})
|
||||
|
||||
// Start the HTTP server
|
||||
addr := fmt.Sprintf("%s:%d", httpHost, httpPort)
|
||||
log.Printf("MCP HTTP server listening on http://%s", addr)
|
||||
log.Printf("MCP endpoint: http://%s/mcp", addr)
|
||||
log.Printf("Health check: http://%s/health", addr)
|
||||
|
||||
if err := http.ListenAndServe(addr, nil); err != nil {
|
||||
log.Fatalf("HTTP server failed: %v", err)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"mcpServers": {
|
||||
"cremote": {
|
||||
"command": "/path/to/cremote/mcp/cremote-mcp",
|
||||
"env": {
|
||||
"CREMOTE_HOST": "localhost",
|
||||
"CREMOTE_PORT": "8989"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,22 @@
|
|||
module git.teamworkapps.com/shortcut/cremote/mcp
|
||||
|
||||
go 1.24.1
|
||||
|
||||
replace git.teamworkapps.com/shortcut/cremote => ../
|
||||
|
||||
require (
|
||||
git.teamworkapps.com/shortcut/cremote v0.0.0-00010101000000-000000000000
|
||||
github.com/mark3labs/mcp-go v0.37.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/bahlo/generic-list-go v0.2.0 // indirect
|
||||
github.com/buger/jsonparser v1.1.1 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/invopop/jsonschema v0.13.0 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/spf13/cast v1.7.1 // indirect
|
||||
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
|
||||
github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
|
@ -0,0 +1,39 @@
|
|||
github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
|
||||
github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
|
||||
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
|
||||
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
|
||||
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
|
||||
github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
|
||||
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
|
||||
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||
github.com/mark3labs/mcp-go v0.37.0 h1:BywvZLPRT6Zx6mMG/MJfxLSZQkTGIcJSEGKsvr4DsoQ=
|
||||
github.com/mark3labs/mcp-go v0.37.0/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||
github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y=
|
||||
github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
|
||||
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
|
||||
github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
|
||||
github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
|
@ -0,0 +1,695 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"git.teamworkapps.com/shortcut/cremote/client"
|
||||
"github.com/mark3labs/mcp-go/mcp"
|
||||
"github.com/mark3labs/mcp-go/server"
|
||||
)
|
||||
|
||||
// CremoteServer wraps the cremote client for MCP
|
||||
type CremoteServer struct {
|
||||
client *client.Client
|
||||
currentTab string
|
||||
tabHistory []string
|
||||
iframeMode bool
|
||||
screenshots []string
|
||||
}
|
||||
|
||||
// NewCremoteServer creates a new cremote MCP server
|
||||
func NewCremoteServer(host string, port int) *CremoteServer {
|
||||
return &CremoteServer{
|
||||
client: client.NewClient(host, port),
|
||||
tabHistory: make([]string, 0),
|
||||
screenshots: make([]string, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions for parameter extraction
|
||||
func getStringParam(params map[string]any, key, defaultValue string) string {
|
||||
if val, ok := params[key].(string); ok {
|
||||
return val
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func getBoolParam(params map[string]any, key string, defaultValue bool) bool {
|
||||
if val, ok := params[key].(bool); ok {
|
||||
return val
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func getIntParam(params map[string]any, key string, defaultValue int) int {
|
||||
if val, ok := params[key].(float64); ok {
|
||||
return int(val)
|
||||
}
|
||||
if val, ok := params[key].(int); ok {
|
||||
return val
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Get cremote daemon connection settings
|
||||
cremoteHost := os.Getenv("CREMOTE_HOST")
|
||||
if cremoteHost == "" {
|
||||
cremoteHost = "localhost"
|
||||
}
|
||||
|
||||
cremotePortStr := os.Getenv("CREMOTE_PORT")
|
||||
cremotePort := 8989
|
||||
if cremotePortStr != "" {
|
||||
if p, err := strconv.Atoi(cremotePortStr); err == nil {
|
||||
cremotePort = p
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("Starting cremote MCP server, connecting to cremote daemon at %s:%d", cremoteHost, cremotePort)
|
||||
|
||||
// Create the cremote server
|
||||
cremoteServer := NewCremoteServer(cremoteHost, cremotePort)
|
||||
|
||||
// Create MCP server
|
||||
mcpServer := server.NewMCPServer("cremote-mcp", "1.0.0")
|
||||
|
||||
// Register web_navigate tool
|
||||
mcpServer.AddTool(mcp.Tool{
|
||||
Name: "web_navigate",
|
||||
Description: "Navigate to a URL and optionally take a screenshot",
|
||||
InputSchema: mcp.ToolInputSchema{
|
||||
Type: "object",
|
||||
Properties: map[string]any{
|
||||
"url": map[string]any{
|
||||
"type": "string",
|
||||
"description": "URL to navigate to",
|
||||
},
|
||||
"tab": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Tab ID (optional, uses current tab)",
|
||||
},
|
||||
"timeout": map[string]any{
|
||||
"type": "integer",
|
||||
"description": "Timeout in seconds",
|
||||
"default": 5,
|
||||
},
|
||||
"screenshot": map[string]any{
|
||||
"type": "boolean",
|
||||
"description": "Take screenshot after navigation",
|
||||
},
|
||||
},
|
||||
Required: []string{"url"},
|
||||
},
|
||||
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
// Convert arguments to map
|
||||
params, ok := request.Params.Arguments.(map[string]any)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid arguments format")
|
||||
}
|
||||
|
||||
url := getStringParam(params, "url", "")
|
||||
if url == "" {
|
||||
return nil, fmt.Errorf("url parameter is required")
|
||||
}
|
||||
|
||||
tab := getStringParam(params, "tab", cremoteServer.currentTab)
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
takeScreenshot := getBoolParam(params, "screenshot", false)
|
||||
|
||||
// If no tab specified and no current tab, create a new one
|
||||
if tab == "" {
|
||||
newTab, err := cremoteServer.client.OpenTab(timeout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create new tab: %w", err)
|
||||
}
|
||||
tab = newTab
|
||||
cremoteServer.currentTab = tab
|
||||
cremoteServer.tabHistory = append(cremoteServer.tabHistory, tab)
|
||||
}
|
||||
|
||||
// Load the URL
|
||||
err := cremoteServer.client.LoadURL(tab, url, timeout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load URL: %w", err)
|
||||
}
|
||||
|
||||
message := fmt.Sprintf("Successfully navigated to %s in tab %s", url, tab)
|
||||
|
||||
// Take screenshot if requested
|
||||
if takeScreenshot {
|
||||
screenshotPath := fmt.Sprintf("/tmp/navigate-%d.png", time.Now().Unix())
|
||||
err = cremoteServer.client.TakeScreenshot(tab, screenshotPath, false, timeout)
|
||||
if err == nil {
|
||||
cremoteServer.screenshots = append(cremoteServer.screenshots, screenshotPath)
|
||||
message += fmt.Sprintf(" (screenshot saved to %s)", screenshotPath)
|
||||
}
|
||||
}
|
||||
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
mcp.NewTextContent(message),
|
||||
},
|
||||
IsError: false,
|
||||
}, nil
|
||||
})
|
||||
|
||||
// Register web_interact tool
|
||||
mcpServer.AddTool(mcp.Tool{
|
||||
Name: "web_interact",
|
||||
Description: "Interact with web elements (click, fill, submit)",
|
||||
InputSchema: mcp.ToolInputSchema{
|
||||
Type: "object",
|
||||
Properties: map[string]any{
|
||||
"action": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Action to perform",
|
||||
"enum": []any{"click", "fill", "submit", "upload"},
|
||||
},
|
||||
"selector": map[string]any{
|
||||
"type": "string",
|
||||
"description": "CSS selector for the element",
|
||||
},
|
||||
"value": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Value to fill (for fill/upload actions)",
|
||||
},
|
||||
"tab": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Tab ID (optional)",
|
||||
},
|
||||
"timeout": map[string]any{
|
||||
"type": "integer",
|
||||
"description": "Timeout in seconds",
|
||||
"default": 5,
|
||||
},
|
||||
},
|
||||
Required: []string{"action", "selector"},
|
||||
},
|
||||
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
// Convert arguments to map
|
||||
params, ok := request.Params.Arguments.(map[string]any)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid arguments format")
|
||||
}
|
||||
|
||||
action := getStringParam(params, "action", "")
|
||||
selector := getStringParam(params, "selector", "")
|
||||
value := getStringParam(params, "value", "")
|
||||
tab := getStringParam(params, "tab", cremoteServer.currentTab)
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
|
||||
if action == "" {
|
||||
return nil, fmt.Errorf("action parameter is required")
|
||||
}
|
||||
if selector == "" {
|
||||
return nil, fmt.Errorf("selector parameter is required")
|
||||
}
|
||||
if tab == "" {
|
||||
return nil, fmt.Errorf("no tab available - navigate to a page first")
|
||||
}
|
||||
|
||||
var err error
|
||||
var message string
|
||||
|
||||
switch action {
|
||||
case "click":
|
||||
err = cremoteServer.client.ClickElement(tab, selector, timeout, timeout)
|
||||
message = fmt.Sprintf("Clicked element %s", selector)
|
||||
|
||||
case "fill":
|
||||
if value == "" {
|
||||
return nil, fmt.Errorf("value parameter is required for fill action")
|
||||
}
|
||||
err = cremoteServer.client.FillFormField(tab, selector, value, timeout, timeout)
|
||||
message = fmt.Sprintf("Filled element %s with value", selector)
|
||||
|
||||
case "submit":
|
||||
err = cremoteServer.client.SubmitForm(tab, selector, timeout, timeout)
|
||||
message = fmt.Sprintf("Submitted form %s", selector)
|
||||
|
||||
case "upload":
|
||||
if value == "" {
|
||||
return nil, fmt.Errorf("value parameter (file path) is required for upload action")
|
||||
}
|
||||
err = cremoteServer.client.UploadFile(tab, selector, value, timeout, timeout)
|
||||
message = fmt.Sprintf("Uploaded file %s to element %s", value, selector)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown action: %s", action)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to %s element: %w", action, err)
|
||||
}
|
||||
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
mcp.NewTextContent(message),
|
||||
},
|
||||
IsError: false,
|
||||
}, nil
|
||||
})
|
||||
|
||||
// Register web_extract tool
|
||||
mcpServer.AddTool(mcp.Tool{
|
||||
Name: "web_extract",
|
||||
Description: "Extract data from the page (source, element HTML, or execute JavaScript)",
|
||||
InputSchema: mcp.ToolInputSchema{
|
||||
Type: "object",
|
||||
Properties: map[string]any{
|
||||
"type": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Type of extraction",
|
||||
"enum": []any{"source", "element", "javascript"},
|
||||
},
|
||||
"selector": map[string]any{
|
||||
"type": "string",
|
||||
"description": "CSS selector (for element type)",
|
||||
},
|
||||
"code": map[string]any{
|
||||
"type": "string",
|
||||
"description": "JavaScript code (for javascript type)",
|
||||
},
|
||||
"tab": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Tab ID (optional)",
|
||||
},
|
||||
"timeout": map[string]any{
|
||||
"type": "integer",
|
||||
"description": "Timeout in seconds",
|
||||
"default": 5,
|
||||
},
|
||||
},
|
||||
Required: []string{"type"},
|
||||
},
|
||||
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
// Convert arguments to map
|
||||
params, ok := request.Params.Arguments.(map[string]any)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid arguments format")
|
||||
}
|
||||
|
||||
extractType := getStringParam(params, "type", "")
|
||||
selector := getStringParam(params, "selector", "")
|
||||
code := getStringParam(params, "code", "")
|
||||
tab := getStringParam(params, "tab", cremoteServer.currentTab)
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
|
||||
if extractType == "" {
|
||||
return nil, fmt.Errorf("type parameter is required")
|
||||
}
|
||||
if tab == "" {
|
||||
return nil, fmt.Errorf("no tab available - navigate to a page first")
|
||||
}
|
||||
|
||||
var data string
|
||||
var err error
|
||||
|
||||
switch extractType {
|
||||
case "source":
|
||||
data, err = cremoteServer.client.GetPageSource(tab, timeout)
|
||||
|
||||
case "element":
|
||||
if selector == "" {
|
||||
return nil, fmt.Errorf("selector parameter is required for element extraction")
|
||||
}
|
||||
data, err = cremoteServer.client.GetElementHTML(tab, selector, timeout)
|
||||
|
||||
case "javascript":
|
||||
if code == "" {
|
||||
return nil, fmt.Errorf("code parameter is required for javascript extraction")
|
||||
}
|
||||
data, err = cremoteServer.client.EvalJS(tab, code, timeout)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown extraction type: %s", extractType)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to extract %s: %w", extractType, err)
|
||||
}
|
||||
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
mcp.NewTextContent(fmt.Sprintf("Extracted %s data: %s", extractType, data)),
|
||||
},
|
||||
IsError: false,
|
||||
}, nil
|
||||
})
|
||||
|
||||
// Register web_screenshot tool
|
||||
mcpServer.AddTool(mcp.Tool{
|
||||
Name: "web_screenshot",
|
||||
Description: "Take a screenshot of the current page",
|
||||
InputSchema: mcp.ToolInputSchema{
|
||||
Type: "object",
|
||||
Properties: map[string]any{
|
||||
"output": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Output file path",
|
||||
},
|
||||
"full_page": map[string]any{
|
||||
"type": "boolean",
|
||||
"description": "Capture full page",
|
||||
"default": false,
|
||||
},
|
||||
"tab": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Tab ID (optional)",
|
||||
},
|
||||
"timeout": map[string]any{
|
||||
"type": "integer",
|
||||
"description": "Timeout in seconds",
|
||||
"default": 5,
|
||||
},
|
||||
},
|
||||
Required: []string{"output"},
|
||||
},
|
||||
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
// Convert arguments to map
|
||||
params, ok := request.Params.Arguments.(map[string]any)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid arguments format")
|
||||
}
|
||||
|
||||
output := getStringParam(params, "output", "")
|
||||
fullPage := getBoolParam(params, "full_page", false)
|
||||
tab := getStringParam(params, "tab", cremoteServer.currentTab)
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
|
||||
if output == "" {
|
||||
return nil, fmt.Errorf("output parameter is required")
|
||||
}
|
||||
if tab == "" {
|
||||
return nil, fmt.Errorf("no tab available - navigate to a page first")
|
||||
}
|
||||
|
||||
err := cremoteServer.client.TakeScreenshot(tab, output, fullPage, timeout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to take screenshot: %w", err)
|
||||
}
|
||||
|
||||
cremoteServer.screenshots = append(cremoteServer.screenshots, output)
|
||||
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
mcp.NewTextContent(fmt.Sprintf("Screenshot saved to %s", output)),
|
||||
},
|
||||
IsError: false,
|
||||
}, nil
|
||||
})
|
||||
|
||||
// Register web_manage_tabs tool
|
||||
mcpServer.AddTool(mcp.Tool{
|
||||
Name: "web_manage_tabs",
|
||||
Description: "Manage browser tabs (open, close, list, switch)",
|
||||
InputSchema: mcp.ToolInputSchema{
|
||||
Type: "object",
|
||||
Properties: map[string]any{
|
||||
"action": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Action to perform",
|
||||
"enum": []any{"open", "close", "list", "switch"},
|
||||
},
|
||||
"tab": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Tab ID (for close/switch actions)",
|
||||
},
|
||||
"timeout": map[string]any{
|
||||
"type": "integer",
|
||||
"description": "Timeout in seconds",
|
||||
"default": 5,
|
||||
},
|
||||
},
|
||||
Required: []string{"action"},
|
||||
},
|
||||
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
// Convert arguments to map
|
||||
params, ok := request.Params.Arguments.(map[string]any)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid arguments format")
|
||||
}
|
||||
|
||||
action := getStringParam(params, "action", "")
|
||||
tab := getStringParam(params, "tab", "")
|
||||
timeout := getIntParam(params, "timeout", 5)
|
||||
|
||||
if action == "" {
|
||||
return nil, fmt.Errorf("action parameter is required")
|
||||
}
|
||||
|
||||
var err error
|
||||
var message string
|
||||
|
||||
switch action {
|
||||
case "open":
|
||||
newTab, err := cremoteServer.client.OpenTab(timeout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open new tab: %w", err)
|
||||
}
|
||||
cremoteServer.currentTab = newTab
|
||||
cremoteServer.tabHistory = append(cremoteServer.tabHistory, newTab)
|
||||
message = fmt.Sprintf("Opened new tab: %s", newTab)
|
||||
|
||||
case "close":
|
||||
if tab == "" {
|
||||
return nil, fmt.Errorf("tab parameter is required for close action")
|
||||
}
|
||||
err = cremoteServer.client.CloseTab(tab, timeout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to close tab: %w", err)
|
||||
}
|
||||
// Remove from history and update current tab
|
||||
for i, id := range cremoteServer.tabHistory {
|
||||
if id == tab {
|
||||
cremoteServer.tabHistory = append(cremoteServer.tabHistory[:i], cremoteServer.tabHistory[i+1:]...)
|
||||
break
|
||||
}
|
||||
}
|
||||
if cremoteServer.currentTab == tab {
|
||||
if len(cremoteServer.tabHistory) > 0 {
|
||||
cremoteServer.currentTab = cremoteServer.tabHistory[len(cremoteServer.tabHistory)-1]
|
||||
} else {
|
||||
cremoteServer.currentTab = ""
|
||||
}
|
||||
}
|
||||
message = fmt.Sprintf("Closed tab: %s", tab)
|
||||
|
||||
case "list":
|
||||
tabs, err := cremoteServer.client.ListTabs()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list tabs: %w", err)
|
||||
}
|
||||
message = fmt.Sprintf("Found %d tabs: %v", len(tabs), tabs)
|
||||
|
||||
case "switch":
|
||||
if tab == "" {
|
||||
return nil, fmt.Errorf("tab parameter is required for switch action")
|
||||
}
|
||||
cremoteServer.currentTab = tab
|
||||
// Add to history if not already there
|
||||
found := false
|
||||
for _, t := range cremoteServer.tabHistory {
|
||||
if t == tab {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
cremoteServer.tabHistory = append(cremoteServer.tabHistory, tab)
|
||||
}
|
||||
message = fmt.Sprintf("Switched to tab: %s", tab)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown tab action: %s", action)
|
||||
}
|
||||
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
mcp.NewTextContent(message),
|
||||
},
|
||||
IsError: false,
|
||||
}, nil
|
||||
})
|
||||
|
||||
// Register web_iframe tool
|
||||
mcpServer.AddTool(mcp.Tool{
|
||||
Name: "web_iframe",
|
||||
Description: "Switch iframe context for subsequent operations",
|
||||
InputSchema: mcp.ToolInputSchema{
|
||||
Type: "object",
|
||||
Properties: map[string]any{
|
||||
"action": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Action to perform",
|
||||
"enum": []any{"enter", "exit"},
|
||||
},
|
||||
"selector": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Iframe CSS selector (for enter action)",
|
||||
},
|
||||
"tab": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Tab ID (optional)",
|
||||
},
|
||||
},
|
||||
Required: []string{"action"},
|
||||
},
|
||||
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
// Convert arguments to map
|
||||
params, ok := request.Params.Arguments.(map[string]any)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid arguments format")
|
||||
}
|
||||
|
||||
action := getStringParam(params, "action", "")
|
||||
selector := getStringParam(params, "selector", "")
|
||||
tab := getStringParam(params, "tab", cremoteServer.currentTab)
|
||||
|
||||
if action == "" {
|
||||
return nil, fmt.Errorf("action parameter is required")
|
||||
}
|
||||
if tab == "" {
|
||||
return nil, fmt.Errorf("no tab available - navigate to a page first")
|
||||
}
|
||||
|
||||
var err error
|
||||
var message string
|
||||
|
||||
switch action {
|
||||
case "enter":
|
||||
if selector == "" {
|
||||
return nil, fmt.Errorf("selector parameter is required for enter action")
|
||||
}
|
||||
err = cremoteServer.client.SwitchToIframe(tab, selector)
|
||||
cremoteServer.iframeMode = true
|
||||
message = fmt.Sprintf("Entered iframe: %s", selector)
|
||||
|
||||
case "exit":
|
||||
err = cremoteServer.client.SwitchToMain(tab)
|
||||
cremoteServer.iframeMode = false
|
||||
message = "Exited iframe context"
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown iframe action: %s", action)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to %s iframe: %w", action, err)
|
||||
}
|
||||
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
mcp.NewTextContent(message),
|
||||
},
|
||||
IsError: false,
|
||||
}, nil
|
||||
})
|
||||
|
||||
// Register file_upload tool
|
||||
mcpServer.AddTool(mcp.Tool{
|
||||
Name: "file_upload",
|
||||
Description: "Upload a file from the client to the container for use in form uploads",
|
||||
InputSchema: mcp.ToolInputSchema{
|
||||
Type: "object",
|
||||
Properties: map[string]any{
|
||||
"local_path": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Path to the file on the client machine",
|
||||
},
|
||||
"container_path": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Optional path where to store the file in the container (defaults to /tmp/filename)",
|
||||
},
|
||||
},
|
||||
Required: []string{"local_path"},
|
||||
},
|
||||
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
// Convert arguments to map
|
||||
params, ok := request.Params.Arguments.(map[string]any)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid arguments format")
|
||||
}
|
||||
|
||||
localPath := getStringParam(params, "local_path", "")
|
||||
containerPath := getStringParam(params, "container_path", "")
|
||||
|
||||
if localPath == "" {
|
||||
return nil, fmt.Errorf("local_path parameter is required")
|
||||
}
|
||||
|
||||
// Upload the file to the container
|
||||
targetPath, err := cremoteServer.client.UploadFileToContainer(localPath, containerPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to upload file: %w", err)
|
||||
}
|
||||
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
mcp.NewTextContent(fmt.Sprintf("File uploaded successfully to container at: %s", targetPath)),
|
||||
},
|
||||
IsError: false,
|
||||
}, nil
|
||||
})
|
||||
|
||||
// Register file_download tool
|
||||
mcpServer.AddTool(mcp.Tool{
|
||||
Name: "file_download",
|
||||
Description: "Download a file from the container to the client (e.g., downloaded files from browser)",
|
||||
InputSchema: mcp.ToolInputSchema{
|
||||
Type: "object",
|
||||
Properties: map[string]any{
|
||||
"container_path": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Path to the file in the container",
|
||||
},
|
||||
"local_path": map[string]any{
|
||||
"type": "string",
|
||||
"description": "Path where to save the file on the client machine",
|
||||
},
|
||||
},
|
||||
Required: []string{"container_path", "local_path"},
|
||||
},
|
||||
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
// Convert arguments to map
|
||||
params, ok := request.Params.Arguments.(map[string]any)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid arguments format")
|
||||
}
|
||||
|
||||
containerPath := getStringParam(params, "container_path", "")
|
||||
localPath := getStringParam(params, "local_path", "")
|
||||
|
||||
if containerPath == "" {
|
||||
return nil, fmt.Errorf("container_path parameter is required")
|
||||
}
|
||||
if localPath == "" {
|
||||
return nil, fmt.Errorf("local_path parameter is required")
|
||||
}
|
||||
|
||||
// Download the file from the container
|
||||
err := cremoteServer.client.DownloadFileFromContainer(containerPath, localPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to download file: %w", err)
|
||||
}
|
||||
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
mcp.NewTextContent(fmt.Sprintf("File downloaded successfully from container to: %s", localPath)),
|
||||
},
|
||||
IsError: false,
|
||||
}, nil
|
||||
})
|
||||
|
||||
// Start the server
|
||||
log.Printf("Cremote MCP server ready")
|
||||
if err := server.ServeStdio(mcpServer); err != nil {
|
||||
log.Fatalf("Server error: %v", err)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
#!/bin/bash
|
||||
(
|
||||
echo '{"method":"tools/list","params":{},"id":1}'
|
||||
sleep 1
|
||||
) | ./cremote-mcp
|
|
@ -0,0 +1 @@
|
|||
This is a test file for cremote file transfer
|
Loading…
Reference in New Issue