package main import ( "bufio" "encoding/json" "fmt" "io" "log" "os" "os/signal" "path/filepath" "strconv" "strings" "syscall" "time" "git.teamworkapps.com/shortcut/cremote/client" ) // DebugLogger handles debug logging to file type DebugLogger struct { file *os.File logger *log.Logger } // NewDebugLogger creates a new debug logger func NewDebugLogger() (*DebugLogger, error) { logDir := "/tmp/cremote-mcp-logs" if err := os.MkdirAll(logDir, 0755); err != nil { return nil, fmt.Errorf("failed to create log directory: %w", err) } logFile := filepath.Join(logDir, fmt.Sprintf("mcp-stdio-%d.log", time.Now().Unix())) file, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) if err != nil { return nil, fmt.Errorf("failed to open log file: %w", err) } logger := log.New(file, "", log.LstdFlags|log.Lmicroseconds|log.Lshortfile) logger.Printf("=== MCP STDIO Server Debug Log Started ===") return &DebugLogger{ file: file, logger: logger, }, nil } // Log writes a debug message func (d *DebugLogger) Log(format string, args ...interface{}) { if d != nil && d.logger != nil { d.logger.Printf(format, args...) } } // LogJSON logs a JSON object with a label func (d *DebugLogger) LogJSON(label string, obj interface{}) { if d != nil && d.logger != nil { jsonBytes, err := json.MarshalIndent(obj, "", " ") if err != nil { d.logger.Printf("%s: JSON marshal error: %v", label, err) } else { d.logger.Printf("%s:\n%s", label, string(jsonBytes)) } } } // Close closes the debug logger func (d *DebugLogger) Close() { if d != nil && d.file != nil { d.logger.Printf("=== MCP STDIO Server Debug Log Ended ===") d.file.Close() } } var debugLogger *DebugLogger // MCPServer wraps the cremote client with MCP protocol type MCPServer struct { client *client.Client currentTab string tabHistory []string iframeMode bool screenshots []string } // MCPRequest represents an incoming MCP request type MCPRequest struct { JSONRPC string `json:"jsonrpc,omitempty"` Method string `json:"method"` Params map[string]interface{} `json:"params"` ID interface{} `json:"id"` } // MCPResponse represents an MCP response type MCPResponse struct { JSONRPC string `json:"jsonrpc,omitempty"` Result interface{} `json:"result,omitempty"` Error *MCPError `json:"error,omitempty"` ID interface{} `json:"id"` } // MCPError represents an MCP error type MCPError struct { Code int `json:"code"` Message string `json:"message"` } // ToolResult represents the result of a tool execution type ToolResult struct { Success bool `json:"success"` Data interface{} `json:"data,omitempty"` Screenshot string `json:"screenshot,omitempty"` CurrentTab string `json:"current_tab,omitempty"` TabHistory []string `json:"tab_history,omitempty"` IframeMode bool `json:"iframe_mode"` Error string `json:"error,omitempty"` Metadata map[string]string `json:"metadata,omitempty"` } // NewMCPServer creates a new MCP server instance func NewMCPServer(host string, port int) *MCPServer { c := client.NewClient(host, port) return &MCPServer{ client: c, tabHistory: make([]string, 0), screenshots: make([]string, 0), } } // HandleRequest processes an MCP request and returns a response func (s *MCPServer) HandleRequest(req MCPRequest) MCPResponse { debugLogger.Log("HandleRequest called with method: %s, ID: %v", req.Method, req.ID) debugLogger.LogJSON("Incoming Request", req) var resp MCPResponse switch req.Method { case "initialize": debugLogger.Log("Handling initialize request") resp = s.handleInitialize(req) case "tools/list": debugLogger.Log("Handling tools/list request") resp = s.handleToolsList(req) case "tools/call": debugLogger.Log("Handling tools/call request") resp = s.handleToolCall(req) default: debugLogger.Log("Unknown method: %s", req.Method) resp = MCPResponse{ Error: &MCPError{ Code: -32601, Message: fmt.Sprintf("Method not found: %s", req.Method), }, ID: req.ID, } } debugLogger.LogJSON("Response", resp) debugLogger.Log("HandleRequest completed for method: %s", req.Method) return resp } // handleInitialize handles the MCP initialize request func (s *MCPServer) handleInitialize(req MCPRequest) MCPResponse { debugLogger.Log("handleInitialize: Processing initialize request") debugLogger.LogJSON("Initialize request params", req.Params) result := map[string]interface{}{ "protocolVersion": "2024-11-05", "capabilities": map[string]interface{}{ "tools": map[string]interface{}{ "listChanged": true, }, }, "serverInfo": map[string]interface{}{ "name": "cremote-mcp", "version": "1.0.0", }, } debugLogger.LogJSON("Initialize response result", result) return MCPResponse{ Result: result, ID: req.ID, } } // handleToolsList returns the list of available tools func (s *MCPServer) handleToolsList(req MCPRequest) MCPResponse { tools := []map[string]interface{}{ { "name": "web_navigate", "description": "Navigate to a URL and optionally take a screenshot", "inputSchema": map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "url": map[string]interface{}{ "type": "string", "description": "URL to navigate to", }, "tab": map[string]interface{}{ "type": "string", "description": "Tab ID (optional, uses current tab)", }, "screenshot": map[string]interface{}{ "type": "boolean", "description": "Take screenshot after navigation", }, "timeout": map[string]interface{}{ "type": "integer", "description": "Timeout in seconds", "default": 5, }, }, "required": []string{"url"}, }, }, { "name": "web_interact", "description": "Interact with web elements (click, fill, submit)", "inputSchema": map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "action": map[string]interface{}{ "type": "string", "enum": []string{"click", "fill", "submit", "upload"}, }, "selector": map[string]interface{}{ "type": "string", "description": "CSS selector for the element", }, "value": map[string]interface{}{ "type": "string", "description": "Value to fill (for fill/upload actions)", }, "tab": map[string]interface{}{ "type": "string", "description": "Tab ID (optional)", }, "timeout": map[string]interface{}{ "type": "integer", "description": "Timeout in seconds", "default": 5, }, }, "required": []string{"action", "selector"}, }, }, { "name": "web_extract", "description": "Extract data from the page (source, element HTML, or execute JavaScript)", "inputSchema": map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "type": map[string]interface{}{ "type": "string", "enum": []string{"source", "element", "javascript"}, }, "selector": map[string]interface{}{ "type": "string", "description": "CSS selector (for element type)", }, "code": map[string]interface{}{ "type": "string", "description": "JavaScript code (for javascript type)", }, "tab": map[string]interface{}{ "type": "string", "description": "Tab ID (optional)", }, "timeout": map[string]interface{}{ "type": "integer", "description": "Timeout in seconds", "default": 5, }, }, "required": []string{"type"}, }, }, { "name": "web_screenshot", "description": "Take a screenshot of the current page", "inputSchema": map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "output": map[string]interface{}{ "type": "string", "description": "Output file path", }, "full_page": map[string]interface{}{ "type": "boolean", "description": "Capture full page", "default": false, }, "tab": map[string]interface{}{ "type": "string", "description": "Tab ID (optional)", }, "timeout": map[string]interface{}{ "type": "integer", "description": "Timeout in seconds", "default": 5, }, }, "required": []string{"output"}, }, }, { "name": "web_manage_tabs", "description": "Manage browser tabs (open, close, list, switch)", "inputSchema": map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "action": map[string]interface{}{ "type": "string", "enum": []string{"open", "close", "list", "switch"}, }, "tab": map[string]interface{}{ "type": "string", "description": "Tab ID (for close/switch actions)", }, "timeout": map[string]interface{}{ "type": "integer", "description": "Timeout in seconds", "default": 5, }, }, "required": []string{"action"}, }, }, { "name": "web_iframe", "description": "Switch iframe context for subsequent operations", "inputSchema": map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "action": map[string]interface{}{ "type": "string", "enum": []string{"enter", "exit"}, }, "selector": map[string]interface{}{ "type": "string", "description": "Iframe CSS selector (for enter action)", }, "tab": map[string]interface{}{ "type": "string", "description": "Tab ID (optional)", }, }, "required": []string{"action"}, }, }, } return MCPResponse{ Result: map[string]interface{}{ "tools": tools, }, ID: req.ID, } } // handleToolCall executes a tool and returns the result func (s *MCPServer) handleToolCall(req MCPRequest) MCPResponse { debugLogger.Log("handleToolCall: Processing tool call request") debugLogger.LogJSON("Tool call request params", req.Params) params := req.Params toolName, ok := params["name"].(string) if !ok || toolName == "" { debugLogger.Log("handleToolCall: Tool name missing or invalid") return MCPResponse{ Error: &MCPError{Code: -32602, Message: "Missing tool name"}, ID: req.ID, } } debugLogger.Log("handleToolCall: Tool name extracted: %s", toolName) arguments, _ := params["arguments"].(map[string]interface{}) if arguments == nil { debugLogger.Log("handleToolCall: No arguments provided, using empty map") arguments = make(map[string]interface{}) } else { debugLogger.LogJSON("Tool arguments", arguments) } var result ToolResult var err error debugLogger.Log("handleToolCall: Dispatching to tool handler: %s", toolName) switch toolName { case "web_navigate": result, err = s.handleNavigate(arguments) case "web_interact": result, err = s.handleInteract(arguments) case "web_extract": result, err = s.handleExtract(arguments) case "web_screenshot": result, err = s.handleScreenshot(arguments) case "web_manage_tabs": result, err = s.handleManageTabs(arguments) case "web_iframe": result, err = s.handleIframe(arguments) default: debugLogger.Log("handleToolCall: Unknown tool: %s", toolName) return MCPResponse{ Error: &MCPError{Code: -32601, Message: fmt.Sprintf("Unknown tool: %s", toolName)}, ID: req.ID, } } debugLogger.LogJSON("Tool execution result", result) if err != nil { debugLogger.Log("handleToolCall: Tool execution error: %v", err) return MCPResponse{ Error: &MCPError{Code: -32603, Message: err.Error()}, ID: req.ID, } } // Always include current state in response result.CurrentTab = s.currentTab result.TabHistory = s.tabHistory result.IframeMode = s.iframeMode response := MCPResponse{Result: result, ID: req.ID} debugLogger.LogJSON("Final tool call response", response) debugLogger.Log("handleToolCall: Completed successfully for tool: %s", toolName) return response } // Helper functions for parameter extraction func getStringParam(params map[string]interface{}, key, defaultValue string) string { if val, ok := params[key].(string); ok { return val } return defaultValue } func getIntParam(params map[string]interface{}, key string, defaultValue int) int { if val, ok := params[key].(float64); ok { return int(val) } return defaultValue } func getBoolParam(params map[string]interface{}, key string, defaultValue bool) bool { if val, ok := params[key].(bool); ok { return val } return defaultValue } // resolveTabID returns the tab ID to use, defaulting to current tab func (s *MCPServer) resolveTabID(tabID string) string { if tabID != "" { return tabID } return s.currentTab } // handleNavigate handles web navigation func (s *MCPServer) handleNavigate(params map[string]interface{}) (ToolResult, error) { url := getStringParam(params, "url", "") if url == "" { return ToolResult{}, fmt.Errorf("url parameter is required") } tab := getStringParam(params, "tab", "") screenshot := getBoolParam(params, "screenshot", false) timeout := getIntParam(params, "timeout", 5) // If no tab specified and no current tab, open a new one if tab == "" && s.currentTab == "" { newTab, err := s.client.OpenTab(timeout) if err != nil { return ToolResult{}, fmt.Errorf("failed to open new tab: %w", err) } s.currentTab = newTab s.tabHistory = append(s.tabHistory, newTab) tab = newTab } else if tab == "" { tab = s.currentTab } else { // Update current tab if specified s.currentTab = tab // Move to end of history if it exists, otherwise add it s.removeTabFromHistory(tab) s.tabHistory = append(s.tabHistory, tab) } // Navigate to URL err := s.client.LoadURL(tab, url, timeout) if err != nil { return ToolResult{}, fmt.Errorf("failed to navigate to %s: %w", url, err) } result := ToolResult{ Success: true, Data: map[string]string{ "url": url, "tab": tab, }, } // Take screenshot if requested if screenshot { screenshotPath := fmt.Sprintf("/tmp/navigate-%d.png", time.Now().Unix()) err = s.client.TakeScreenshot(tab, screenshotPath, false, timeout) if err != nil { // Don't fail the whole operation for screenshot errors result.Metadata = map[string]string{ "screenshot_error": err.Error(), } } else { result.Screenshot = screenshotPath s.screenshots = append(s.screenshots, screenshotPath) } } return result, nil } // handleInteract handles element interactions func (s *MCPServer) handleInteract(params map[string]interface{}) (ToolResult, error) { action := getStringParam(params, "action", "") selector := getStringParam(params, "selector", "") value := getStringParam(params, "value", "") tab := s.resolveTabID(getStringParam(params, "tab", "")) timeout := getIntParam(params, "timeout", 5) if action == "" { return ToolResult{}, fmt.Errorf("action parameter is required") } if selector == "" { return ToolResult{}, fmt.Errorf("selector parameter is required") } if tab == "" { return ToolResult{}, fmt.Errorf("no active tab available") } var err error result := ToolResult{Success: true} switch action { case "click": err = s.client.ClickElement(tab, selector, timeout, timeout) result.Data = map[string]string{"action": "clicked", "selector": selector} case "fill": if value == "" { return ToolResult{}, fmt.Errorf("value parameter is required for fill action") } err = s.client.FillFormField(tab, selector, value, timeout, timeout) result.Data = map[string]string{"action": "filled", "selector": selector, "value": value} case "submit": err = s.client.SubmitForm(tab, selector, timeout, timeout) result.Data = map[string]string{"action": "submitted", "selector": selector} case "upload": if value == "" { return ToolResult{}, fmt.Errorf("value parameter is required for upload action") } err = s.client.UploadFile(tab, selector, value, timeout, timeout) result.Data = map[string]string{"action": "uploaded", "selector": selector, "file": value} default: return ToolResult{}, fmt.Errorf("unknown action: %s", action) } if err != nil { return ToolResult{}, fmt.Errorf("failed to %s element: %w", action, err) } return result, nil } // handleExtract handles data extraction func (s *MCPServer) handleExtract(params map[string]interface{}) (ToolResult, error) { extractType := getStringParam(params, "type", "") selector := getStringParam(params, "selector", "") code := getStringParam(params, "code", "") tab := s.resolveTabID(getStringParam(params, "tab", "")) timeout := getIntParam(params, "timeout", 5) if extractType == "" { return ToolResult{}, fmt.Errorf("type parameter is required") } if tab == "" { return ToolResult{}, fmt.Errorf("no active tab available") } var data string var err error switch extractType { case "source": data, err = s.client.GetPageSource(tab, timeout) case "element": if selector == "" { return ToolResult{}, fmt.Errorf("selector parameter is required for element type") } data, err = s.client.GetElementHTML(tab, selector, timeout) case "javascript": if code == "" { return ToolResult{}, fmt.Errorf("code parameter is required for javascript type") } data, err = s.client.EvalJS(tab, code, timeout) default: return ToolResult{}, fmt.Errorf("unknown extract type: %s", extractType) } if err != nil { return ToolResult{}, fmt.Errorf("failed to extract %s: %w", extractType, err) } return ToolResult{ Success: true, Data: data, }, nil } // handleScreenshot handles screenshot capture func (s *MCPServer) handleScreenshot(params map[string]interface{}) (ToolResult, error) { output := getStringParam(params, "output", "") if output == "" { output = fmt.Sprintf("/tmp/screenshot-%d.png", time.Now().Unix()) } tab := s.resolveTabID(getStringParam(params, "tab", "")) fullPage := getBoolParam(params, "full_page", false) timeout := getIntParam(params, "timeout", 5) if tab == "" { return ToolResult{}, fmt.Errorf("no active tab available") } err := s.client.TakeScreenshot(tab, output, fullPage, timeout) if err != nil { return ToolResult{}, fmt.Errorf("failed to take screenshot: %w", err) } s.screenshots = append(s.screenshots, output) return ToolResult{ Success: true, Screenshot: output, Data: map[string]interface{}{ "output": output, "full_page": fullPage, "tab": tab, }, }, nil } // handleManageTabs handles tab management operations func (s *MCPServer) handleManageTabs(params map[string]interface{}) (ToolResult, error) { action := getStringParam(params, "action", "") tab := getStringParam(params, "tab", "") timeout := getIntParam(params, "timeout", 5) if action == "" { return ToolResult{}, fmt.Errorf("action parameter is required") } var data interface{} var err error switch action { case "open": newTab, err := s.client.OpenTab(timeout) if err != nil { return ToolResult{}, fmt.Errorf("failed to open tab: %w", err) } s.currentTab = newTab s.tabHistory = append(s.tabHistory, newTab) data = map[string]string{"tab": newTab, "action": "opened"} case "close": targetTab := s.resolveTabID(tab) if targetTab == "" { return ToolResult{}, fmt.Errorf("no tab to close") } err = s.client.CloseTab(targetTab, timeout) if err != nil { return ToolResult{}, fmt.Errorf("failed to close tab: %w", err) } s.removeTabFromHistory(targetTab) data = map[string]string{"tab": targetTab, "action": "closed"} case "list": tabs, err := s.client.ListTabs() if err != nil { return ToolResult{}, fmt.Errorf("failed to list tabs: %w", err) } data = tabs case "switch": if tab == "" { return ToolResult{}, fmt.Errorf("tab parameter is required for switch action") } s.currentTab = tab s.removeTabFromHistory(tab) s.tabHistory = append(s.tabHistory, tab) data = map[string]string{"tab": tab, "action": "switched"} default: return ToolResult{}, fmt.Errorf("unknown tab action: %s", action) } if err != nil { return ToolResult{}, err } return ToolResult{ Success: true, Data: data, }, nil } // handleIframe handles iframe context switching func (s *MCPServer) handleIframe(params map[string]interface{}) (ToolResult, error) { action := getStringParam(params, "action", "") selector := getStringParam(params, "selector", "") tab := s.resolveTabID(getStringParam(params, "tab", "")) if action == "" { return ToolResult{}, fmt.Errorf("action parameter is required") } if tab == "" { return ToolResult{}, fmt.Errorf("no active tab available") } var err error var data map[string]string switch action { case "enter": if selector == "" { return ToolResult{}, fmt.Errorf("selector parameter is required for enter action") } err = s.client.SwitchToIframe(tab, selector) s.iframeMode = true data = map[string]string{"action": "entered", "selector": selector} case "exit": err = s.client.SwitchToMain(tab) s.iframeMode = false data = map[string]string{"action": "exited"} default: return ToolResult{}, fmt.Errorf("unknown iframe action: %s", action) } if err != nil { return ToolResult{}, fmt.Errorf("failed to %s iframe: %w", action, err) } return ToolResult{ Success: true, Data: data, }, nil } // removeTabFromHistory removes a tab from history and updates current tab func (s *MCPServer) removeTabFromHistory(tabID string) { for i, id := range s.tabHistory { if id == tabID { s.tabHistory = append(s.tabHistory[:i], s.tabHistory[i+1:]...) break } } if s.currentTab == tabID { if len(s.tabHistory) > 0 { s.currentTab = s.tabHistory[len(s.tabHistory)-1] } else { s.currentTab = "" } } } func main() { // Initialize debug logger var err error debugLogger, err = NewDebugLogger() if err != nil { log.Printf("Warning: Failed to initialize debug logger: %v", err) // Continue without debug logging } else { defer debugLogger.Close() debugLogger.Log("MCP STDIO Server starting up") } // Set up signal handling for graceful shutdown sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) host := os.Getenv("CREMOTE_HOST") if host == "" { host = "localhost" } portStr := os.Getenv("CREMOTE_PORT") port := 8989 if portStr != "" { if p, err := strconv.Atoi(portStr); err == nil { port = p } } debugLogger.Log("Connecting to cremote daemon at %s:%d", host, port) log.Printf("Starting MCP stdio server, connecting to cremote daemon at %s:%d", host, port) server := NewMCPServer(host, port) // Create a buffered reader for better EOF handling reader := bufio.NewReader(os.Stdin) log.Printf("MCP stdio server ready, waiting for requests...") // Channel to signal when to stop reading done := make(chan bool) // Goroutine to handle stdin reading go func() { defer close(done) for { // Read headers for Content-Length framing (use the same reader for headers and body) contentLength := -1 for { line, err := reader.ReadString('\n') if err != nil { if err == io.EOF { log.Printf("Input stream closed, shutting down MCP server") return } log.Printf("Error reading header: %v", err) return } line = strings.TrimRight(line, "\r\n") if line == "" { break // end of headers } const prefix = "content-length: " low := strings.ToLower(line) if strings.HasPrefix(low, prefix) { var err error contentLength, err = strconv.Atoi(strings.TrimSpace(low[len(prefix):])) if err != nil { log.Printf("Invalid Content-Length: %v", err) return } } } if contentLength < 0 { log.Printf("Missing Content-Length header") return } // Read body of specified length debugLogger.Log("Reading request body of length: %d", contentLength) body := make([]byte, contentLength) if _, err := io.ReadFull(reader, body); err != nil { debugLogger.Log("Error reading body: %v", err) log.Printf("Error reading body: %v", err) return } debugLogger.Log("Raw request body: %s", string(body)) var req MCPRequest if err := json.Unmarshal(body, &req); err != nil { debugLogger.Log("Error decoding request body: %v", err) log.Printf("Error decoding request body: %v", err) continue } debugLogger.Log("Successfully parsed request: %s (ID: %v)", req.Method, req.ID) log.Printf("Processing request: %s (ID: %v)", req.Method, req.ID) resp := server.HandleRequest(req) resp.JSONRPC = "2.0" // Write Content-Length framed response responseBytes, err := json.Marshal(resp) if err != nil { debugLogger.Log("Error marshaling response: %v", err) log.Printf("Error marshaling response: %v", err) continue } debugLogger.Log("Sending response with Content-Length: %d", len(responseBytes)) debugLogger.Log("Raw response: %s", string(responseBytes)) fmt.Fprintf(os.Stdout, "Content-Length: %d\r\n\r\n", len(responseBytes)) os.Stdout.Write(responseBytes) debugLogger.Log("Response sent successfully for request: %s", req.Method) log.Printf("Completed request: %s", req.Method) } }() // Wait for either completion or signal select { case <-done: log.Printf("Input processing completed") case sig := <-sigChan: log.Printf("Received signal %v, shutting down", sig) } log.Printf("MCP stdio server shutdown complete") }