cremote/mcp/main.go

805 lines
23 KiB
Go

package main
import (
"context"
"fmt"
"log"
"os"
"strconv"
"time"
"git.teamworkapps.com/shortcut/cremote/client"
"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
)
// CremoteServer wraps the cremote client for MCP
type CremoteServer struct {
client *client.Client
currentTab string
tabHistory []string
iframeMode bool
screenshots []string
}
// NewCremoteServer creates a new cremote MCP server
func NewCremoteServer(host string, port int) *CremoteServer {
return &CremoteServer{
client: client.NewClient(host, port),
tabHistory: make([]string, 0),
screenshots: make([]string, 0),
}
}
// Helper functions for parameter extraction
func getStringParam(params map[string]any, key, defaultValue string) string {
if val, ok := params[key].(string); ok {
return val
}
return defaultValue
}
func getBoolParam(params map[string]any, key string, defaultValue bool) bool {
if val, ok := params[key].(bool); ok {
return val
}
return defaultValue
}
func getIntParam(params map[string]any, key string, defaultValue int) int {
if val, ok := params[key].(float64); ok {
return int(val)
}
if val, ok := params[key].(int); ok {
return val
}
return defaultValue
}
func main() {
// Get cremote daemon connection settings
cremoteHost := os.Getenv("CREMOTE_HOST")
if cremoteHost == "" {
cremoteHost = "localhost"
}
cremotePortStr := os.Getenv("CREMOTE_PORT")
cremotePort := 8989
if cremotePortStr != "" {
if p, err := strconv.Atoi(cremotePortStr); err == nil {
cremotePort = p
}
}
log.Printf("Starting cremote MCP server, connecting to cremote daemon at %s:%d", cremoteHost, cremotePort)
// Create the cremote server
cremoteServer := NewCremoteServer(cremoteHost, cremotePort)
// Create MCP server
mcpServer := server.NewMCPServer("cremote-mcp", "1.0.0")
// Register web_navigate tool
mcpServer.AddTool(mcp.Tool{
Name: "web_navigate",
Description: "Navigate to a URL and optionally take a screenshot",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"url": map[string]any{
"type": "string",
"description": "URL to navigate to",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional, uses current tab)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
"screenshot": map[string]any{
"type": "boolean",
"description": "Take screenshot after navigation",
},
},
Required: []string{"url"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
url := getStringParam(params, "url", "")
if url == "" {
return nil, fmt.Errorf("url parameter is required")
}
tab := getStringParam(params, "tab", cremoteServer.currentTab)
timeout := getIntParam(params, "timeout", 5)
takeScreenshot := getBoolParam(params, "screenshot", false)
// If no tab specified and no current tab, create a new one
if tab == "" {
newTab, err := cremoteServer.client.OpenTab(timeout)
if err != nil {
return nil, fmt.Errorf("failed to create new tab: %w", err)
}
tab = newTab
cremoteServer.currentTab = tab
cremoteServer.tabHistory = append(cremoteServer.tabHistory, tab)
}
// Load the URL
err := cremoteServer.client.LoadURL(tab, url, timeout)
if err != nil {
return nil, fmt.Errorf("failed to load URL: %w", err)
}
message := fmt.Sprintf("Successfully navigated to %s in tab %s", url, tab)
// Take screenshot if requested
if takeScreenshot {
screenshotPath := fmt.Sprintf("/tmp/navigate-%d.png", time.Now().Unix())
err = cremoteServer.client.TakeScreenshot(tab, screenshotPath, false, timeout)
if err == nil {
cremoteServer.screenshots = append(cremoteServer.screenshots, screenshotPath)
message += fmt.Sprintf(" (screenshot saved to %s)", screenshotPath)
}
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(message),
},
IsError: false,
}, nil
})
// Register web_interact tool
mcpServer.AddTool(mcp.Tool{
Name: "web_interact",
Description: "Interact with web elements (click, fill, submit)",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"action": map[string]any{
"type": "string",
"description": "Action to perform",
"enum": []any{"click", "fill", "submit", "upload"},
},
"selector": map[string]any{
"type": "string",
"description": "CSS selector for the element",
},
"value": map[string]any{
"type": "string",
"description": "Value to fill (for fill/upload actions)",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
Required: []string{"action", "selector"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
action := getStringParam(params, "action", "")
selector := getStringParam(params, "selector", "")
value := getStringParam(params, "value", "")
tab := getStringParam(params, "tab", cremoteServer.currentTab)
timeout := getIntParam(params, "timeout", 5)
if action == "" {
return nil, fmt.Errorf("action parameter is required")
}
if selector == "" {
return nil, fmt.Errorf("selector parameter is required")
}
if tab == "" {
return nil, fmt.Errorf("no tab available - navigate to a page first")
}
var err error
var message string
switch action {
case "click":
err = cremoteServer.client.ClickElement(tab, selector, timeout, timeout)
message = fmt.Sprintf("Clicked element %s", selector)
case "fill":
if value == "" {
return nil, fmt.Errorf("value parameter is required for fill action")
}
err = cremoteServer.client.FillFormField(tab, selector, value, timeout, timeout)
message = fmt.Sprintf("Filled element %s with value", selector)
case "submit":
err = cremoteServer.client.SubmitForm(tab, selector, timeout, timeout)
message = fmt.Sprintf("Submitted form %s", selector)
case "upload":
if value == "" {
return nil, fmt.Errorf("value parameter (file path) is required for upload action")
}
err = cremoteServer.client.UploadFile(tab, selector, value, timeout, timeout)
message = fmt.Sprintf("Uploaded file %s to element %s", value, selector)
default:
return nil, fmt.Errorf("unknown action: %s", action)
}
if err != nil {
return nil, fmt.Errorf("failed to %s element: %w", action, err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(message),
},
IsError: false,
}, nil
})
// Register web_extract tool
mcpServer.AddTool(mcp.Tool{
Name: "web_extract",
Description: "Extract data from the page (source, element HTML, or execute JavaScript)",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"type": map[string]any{
"type": "string",
"description": "Type of extraction",
"enum": []any{"source", "element", "javascript"},
},
"selector": map[string]any{
"type": "string",
"description": "CSS selector (for element type)",
},
"code": map[string]any{
"type": "string",
"description": "JavaScript code (for javascript type)",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
Required: []string{"type"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
extractType := getStringParam(params, "type", "")
selector := getStringParam(params, "selector", "")
code := getStringParam(params, "code", "")
tab := getStringParam(params, "tab", cremoteServer.currentTab)
timeout := getIntParam(params, "timeout", 5)
if extractType == "" {
return nil, fmt.Errorf("type parameter is required")
}
if tab == "" {
return nil, fmt.Errorf("no tab available - navigate to a page first")
}
var data string
var err error
switch extractType {
case "source":
data, err = cremoteServer.client.GetPageSource(tab, timeout)
case "element":
if selector == "" {
return nil, fmt.Errorf("selector parameter is required for element extraction")
}
data, err = cremoteServer.client.GetElementHTML(tab, selector, timeout)
case "javascript":
if code == "" {
return nil, fmt.Errorf("code parameter is required for javascript extraction")
}
data, err = cremoteServer.client.EvalJS(tab, code, timeout)
default:
return nil, fmt.Errorf("unknown extraction type: %s", extractType)
}
if err != nil {
return nil, fmt.Errorf("failed to extract %s: %w", extractType, err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("Extracted %s data: %s", extractType, data)),
},
IsError: false,
}, nil
})
// Register web_screenshot tool
mcpServer.AddTool(mcp.Tool{
Name: "web_screenshot",
Description: "Take a screenshot of the current page",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"output": map[string]any{
"type": "string",
"description": "Output file path",
},
"full_page": map[string]any{
"type": "boolean",
"description": "Capture full page",
"default": false,
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
Required: []string{"output"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
output := getStringParam(params, "output", "")
fullPage := getBoolParam(params, "full_page", false)
tab := getStringParam(params, "tab", cremoteServer.currentTab)
timeout := getIntParam(params, "timeout", 5)
if output == "" {
return nil, fmt.Errorf("output parameter is required")
}
if tab == "" {
return nil, fmt.Errorf("no tab available - navigate to a page first")
}
err := cremoteServer.client.TakeScreenshot(tab, output, fullPage, timeout)
if err != nil {
return nil, fmt.Errorf("failed to take screenshot: %w", err)
}
cremoteServer.screenshots = append(cremoteServer.screenshots, output)
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("Screenshot saved to %s", output)),
},
IsError: false,
}, nil
})
// Register web_manage_tabs tool
mcpServer.AddTool(mcp.Tool{
Name: "web_manage_tabs",
Description: "Manage browser tabs (open, close, list, switch)",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"action": map[string]any{
"type": "string",
"description": "Action to perform",
"enum": []any{"open", "close", "list", "switch"},
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (for close/switch actions)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
Required: []string{"action"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
action := getStringParam(params, "action", "")
tab := getStringParam(params, "tab", "")
timeout := getIntParam(params, "timeout", 5)
if action == "" {
return nil, fmt.Errorf("action parameter is required")
}
var err error
var message string
switch action {
case "open":
newTab, err := cremoteServer.client.OpenTab(timeout)
if err != nil {
return nil, fmt.Errorf("failed to open new tab: %w", err)
}
cremoteServer.currentTab = newTab
cremoteServer.tabHistory = append(cremoteServer.tabHistory, newTab)
message = fmt.Sprintf("Opened new tab: %s", newTab)
case "close":
if tab == "" {
return nil, fmt.Errorf("tab parameter is required for close action")
}
err = cremoteServer.client.CloseTab(tab, timeout)
if err != nil {
return nil, fmt.Errorf("failed to close tab: %w", err)
}
// Remove from history and update current tab
for i, id := range cremoteServer.tabHistory {
if id == tab {
cremoteServer.tabHistory = append(cremoteServer.tabHistory[:i], cremoteServer.tabHistory[i+1:]...)
break
}
}
if cremoteServer.currentTab == tab {
if len(cremoteServer.tabHistory) > 0 {
cremoteServer.currentTab = cremoteServer.tabHistory[len(cremoteServer.tabHistory)-1]
} else {
cremoteServer.currentTab = ""
}
}
message = fmt.Sprintf("Closed tab: %s", tab)
case "list":
tabs, err := cremoteServer.client.ListTabs()
if err != nil {
return nil, fmt.Errorf("failed to list tabs: %w", err)
}
message = fmt.Sprintf("Found %d tabs: %v", len(tabs), tabs)
case "switch":
if tab == "" {
return nil, fmt.Errorf("tab parameter is required for switch action")
}
cremoteServer.currentTab = tab
// Add to history if not already there
found := false
for _, t := range cremoteServer.tabHistory {
if t == tab {
found = true
break
}
}
if !found {
cremoteServer.tabHistory = append(cremoteServer.tabHistory, tab)
}
message = fmt.Sprintf("Switched to tab: %s", tab)
default:
return nil, fmt.Errorf("unknown tab action: %s", action)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(message),
},
IsError: false,
}, nil
})
// Register web_iframe tool
mcpServer.AddTool(mcp.Tool{
Name: "web_iframe",
Description: "Switch iframe context for subsequent operations",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"action": map[string]any{
"type": "string",
"description": "Action to perform",
"enum": []any{"enter", "exit"},
},
"selector": map[string]any{
"type": "string",
"description": "Iframe CSS selector (for enter action)",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional)",
},
},
Required: []string{"action"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
action := getStringParam(params, "action", "")
selector := getStringParam(params, "selector", "")
tab := getStringParam(params, "tab", cremoteServer.currentTab)
if action == "" {
return nil, fmt.Errorf("action parameter is required")
}
if tab == "" {
return nil, fmt.Errorf("no tab available - navigate to a page first")
}
var err error
var message string
switch action {
case "enter":
if selector == "" {
return nil, fmt.Errorf("selector parameter is required for enter action")
}
err = cremoteServer.client.SwitchToIframe(tab, selector)
cremoteServer.iframeMode = true
message = fmt.Sprintf("Entered iframe: %s", selector)
case "exit":
err = cremoteServer.client.SwitchToMain(tab)
cremoteServer.iframeMode = false
message = "Exited iframe context"
default:
return nil, fmt.Errorf("unknown iframe action: %s", action)
}
if err != nil {
return nil, fmt.Errorf("failed to %s iframe: %w", action, err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(message),
},
IsError: false,
}, nil
})
// Register file_upload tool
mcpServer.AddTool(mcp.Tool{
Name: "file_upload",
Description: "Upload a file from the client to the container for use in form uploads",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"local_path": map[string]any{
"type": "string",
"description": "Path to the file on the client machine",
},
"container_path": map[string]any{
"type": "string",
"description": "Optional path where to store the file in the container (defaults to /tmp/filename)",
},
},
Required: []string{"local_path"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
localPath := getStringParam(params, "local_path", "")
containerPath := getStringParam(params, "container_path", "")
if localPath == "" {
return nil, fmt.Errorf("local_path parameter is required")
}
// Upload the file to the container
targetPath, err := cremoteServer.client.UploadFileToContainer(localPath, containerPath)
if err != nil {
return nil, fmt.Errorf("failed to upload file: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("File uploaded successfully to container at: %s", targetPath)),
},
IsError: false,
}, nil
})
// Register file_download tool
mcpServer.AddTool(mcp.Tool{
Name: "file_download",
Description: "Download a file from the container to the client (e.g., downloaded files from browser)",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"container_path": map[string]any{
"type": "string",
"description": "Path to the file in the container",
},
"local_path": map[string]any{
"type": "string",
"description": "Path where to save the file on the client machine",
},
},
Required: []string{"container_path", "local_path"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
containerPath := getStringParam(params, "container_path", "")
localPath := getStringParam(params, "local_path", "")
if containerPath == "" {
return nil, fmt.Errorf("container_path parameter is required")
}
if localPath == "" {
return nil, fmt.Errorf("local_path parameter is required")
}
// Download the file from the container
err := cremoteServer.client.DownloadFileFromContainer(containerPath, localPath)
if err != nil {
return nil, fmt.Errorf("failed to download file: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("File downloaded successfully from container to: %s", localPath)),
},
IsError: false,
}, nil
})
// Register console_logs tool
mcpServer.AddTool(mcp.Tool{
Name: "console_logs",
Description: "Get console logs from the browser tab",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional, uses current tab)",
},
"clear": map[string]any{
"type": "boolean",
"description": "Clear logs after retrieval (default: false)",
"default": false,
},
},
Required: []string{},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
tab := getStringParam(params, "tab", cremoteServer.currentTab)
clear := getBoolParam(params, "clear", false)
// Get console logs
logs, err := cremoteServer.client.GetConsoleLogs(tab, clear)
if err != nil {
return nil, fmt.Errorf("failed to get console logs: %w", err)
}
// Format logs for display
var logText string
if len(logs) == 0 {
logText = "No console logs found."
} else {
logText = fmt.Sprintf("Found %d console log entries:\n\n", len(logs))
for i, log := range logs {
level := log["level"].(string)
message := log["message"].(string)
timestamp := log["timestamp"].(string)
logText += fmt.Sprintf("[%d] %s [%s]: %s\n", i+1, timestamp, level, message)
}
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(logText),
},
IsError: false,
}, nil
})
// Register console_command tool
mcpServer.AddTool(mcp.Tool{
Name: "console_command",
Description: "Execute a command in the browser console",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"command": map[string]any{
"type": "string",
"description": "JavaScript command to execute in console",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional, uses current tab)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds (default: 5)",
"default": 5,
},
},
Required: []string{"command"},
},
}, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Convert arguments to map
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
command := getStringParam(params, "command", "")
tab := getStringParam(params, "tab", cremoteServer.currentTab)
timeout := getIntParam(params, "timeout", 5)
if command == "" {
return nil, fmt.Errorf("command parameter is required")
}
// Execute console command
result, err := cremoteServer.client.ExecuteConsoleCommand(tab, command, timeout)
if err != nil {
return nil, fmt.Errorf("failed to execute console command: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("Console command executed successfully.\nCommand: %s\nResult: %s", command, result)),
},
IsError: false,
}, nil
})
// Start the server
log.Printf("Cremote MCP server ready")
if err := server.ServeStdio(mcpServer); err != nil {
log.Fatalf("Server error: %v", err)
}
}