package main import ( "context" "crypto/rand" "encoding/hex" "encoding/json" "fmt" "log" "net/http" "os" "strconv" "sync" "time" "git.teamworkapps.com/shortcut/cremote/client" "github.com/mark3labs/mcp-go/mcp" "github.com/mark3labs/mcp-go/server" ) const Version = "2.0.0" // ClientContext provides a unified interface for accessing client state // This allows the same tool handlers to work with both single-client and multi-client modes type ClientContext interface { GetClient() *client.Client GetCurrentTab() string SetCurrentTab(tabID string) AddToTabHistory(tabID string) GetTabHistory() []string SetIframeMode(mode bool) GetIframeMode() bool AddScreenshot(path string) GetScreenshots() []string } // ClientSession represents an isolated session for a single MCP client type ClientSession struct { ID string client *client.Client currentTab string tabHistory []string iframeMode bool screenshots []string createdAt time.Time lastUsed time.Time mu sync.RWMutex } // SessionManager manages multiple client sessions type SessionManager struct { sessions map[string]*ClientSession mu sync.RWMutex host string port int } // CremoteServer wraps the cremote client for MCP (legacy single-client mode) type CremoteServer struct { client *client.Client currentTab string tabHistory []string iframeMode bool screenshots []string } // NewSessionManager creates a new session manager func NewSessionManager(host string, port int) *SessionManager { return &SessionManager{ sessions: make(map[string]*ClientSession), host: host, port: port, } } // generateSessionID creates a cryptographically secure session ID func generateSessionID() string { bytes := make([]byte, 16) rand.Read(bytes) return hex.EncodeToString(bytes) } // CreateSession creates a new client session func (sm *SessionManager) CreateSession() *ClientSession { sm.mu.Lock() defer sm.mu.Unlock() session := &ClientSession{ ID: generateSessionID(), client: client.NewClient(sm.host, sm.port), tabHistory: make([]string, 0), screenshots: make([]string, 0), createdAt: time.Now(), lastUsed: time.Now(), } sm.sessions[session.ID] = session return session } // GetSession retrieves a session by ID func (sm *SessionManager) GetSession(sessionID string) (*ClientSession, bool) { sm.mu.RLock() defer sm.mu.RUnlock() session, exists := sm.sessions[sessionID] if exists { session.mu.Lock() session.lastUsed = time.Now() session.mu.Unlock() } return session, exists } // RemoveSession removes a session func (sm *SessionManager) RemoveSession(sessionID string) { sm.mu.Lock() defer sm.mu.Unlock() delete(sm.sessions, sessionID) } // CleanupExpiredSessions removes sessions that haven't been used recently func (sm *SessionManager) CleanupExpiredSessions(maxAge time.Duration) { sm.mu.Lock() defer sm.mu.Unlock() now := time.Now() for id, session := range sm.sessions { session.mu.RLock() if now.Sub(session.lastUsed) > maxAge { delete(sm.sessions, id) } session.mu.RUnlock() } } // Session-aware methods for ClientSession func (cs *ClientSession) GetCurrentTab() string { cs.mu.RLock() defer cs.mu.RUnlock() return cs.currentTab } func (cs *ClientSession) SetCurrentTab(tabID string) { cs.mu.Lock() defer cs.mu.Unlock() cs.currentTab = tabID cs.lastUsed = time.Now() } func (cs *ClientSession) AddToTabHistory(tabID string) { cs.mu.Lock() defer cs.mu.Unlock() cs.tabHistory = append(cs.tabHistory, tabID) cs.lastUsed = time.Now() } func (cs *ClientSession) GetTabHistory() []string { cs.mu.RLock() defer cs.mu.RUnlock() // Return a copy to prevent external modification history := make([]string, len(cs.tabHistory)) copy(history, cs.tabHistory) return history } func (cs *ClientSession) SetIframeMode(mode bool) { cs.mu.Lock() defer cs.mu.Unlock() cs.iframeMode = mode cs.lastUsed = time.Now() } func (cs *ClientSession) GetIframeMode() bool { cs.mu.RLock() defer cs.mu.RUnlock() return cs.iframeMode } func (cs *ClientSession) AddScreenshot(path string) { cs.mu.Lock() defer cs.mu.Unlock() cs.screenshots = append(cs.screenshots, path) cs.lastUsed = time.Now() } func (cs *ClientSession) GetScreenshots() []string { cs.mu.RLock() defer cs.mu.RUnlock() // Return a copy to prevent external modification screenshots := make([]string, len(cs.screenshots)) copy(screenshots, cs.screenshots) return screenshots } // Implement ClientContext interface for ClientSession func (cs *ClientSession) GetClient() *client.Client { return cs.client } // NewCremoteServer creates a new cremote MCP server (legacy single-client mode) func NewCremoteServer(host string, port int) *CremoteServer { return &CremoteServer{ client: client.NewClient(host, port), tabHistory: make([]string, 0), screenshots: make([]string, 0), } } // Implement ClientContext interface for CremoteServer (legacy mode) func (cs *CremoteServer) GetClient() *client.Client { return cs.client } func (cs *CremoteServer) GetCurrentTab() string { return cs.currentTab } func (cs *CremoteServer) SetCurrentTab(tabID string) { cs.currentTab = tabID } func (cs *CremoteServer) AddToTabHistory(tabID string) { cs.tabHistory = append(cs.tabHistory, tabID) } func (cs *CremoteServer) GetTabHistory() []string { // Return a copy to prevent external modification history := make([]string, len(cs.tabHistory)) copy(history, cs.tabHistory) return history } func (cs *CremoteServer) SetIframeMode(mode bool) { cs.iframeMode = mode } func (cs *CremoteServer) GetIframeMode() bool { return cs.iframeMode } func (cs *CremoteServer) AddScreenshot(path string) { cs.screenshots = append(cs.screenshots, path) } func (cs *CremoteServer) GetScreenshots() []string { // Return a copy to prevent external modification screenshots := make([]string, len(cs.screenshots)) copy(screenshots, cs.screenshots) return screenshots } // Helper functions for parameter extraction func getStringParam(params map[string]any, key, defaultValue string) string { if val, ok := params[key].(string); ok { return val } return defaultValue } func getBoolParam(params map[string]any, key string, defaultValue bool) bool { if val, ok := params[key].(bool); ok { return val } return defaultValue } func getIntParam(params map[string]any, key string, defaultValue int) int { if val, ok := params[key].(float64); ok { return int(val) } if val, ok := params[key].(int); ok { return val } return defaultValue } // createToolHandlers creates session-aware tool handlers that work with ClientContext func createToolHandlers(getContext func() ClientContext) map[string]func(context.Context, mcp.CallToolRequest) (*mcp.CallToolResult, error) { handlers := make(map[string]func(context.Context, mcp.CallToolRequest) (*mcp.CallToolResult, error)) // Version tool handler handlers["version_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() daemonVersion, err := clientCtx.GetClient().GetVersion() if err != nil { daemonVersion = fmt.Sprintf("Unable to connect: %v", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(fmt.Sprintf("MCP Server version: %s\nDaemon version: %s", Version, daemonVersion)), }, IsError: false, }, nil } // Navigation tool handler handlers["web_navigate_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } url := getStringParam(params, "url", "") if url == "" { return nil, fmt.Errorf("url parameter is required") } tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) takeScreenshot := getBoolParam(params, "screenshot", false) // If no tab specified and no current tab, create a new one if tab == "" { newTab, err := clientCtx.GetClient().OpenTab(timeout) if err != nil { return nil, fmt.Errorf("failed to create new tab: %w", err) } tab = newTab clientCtx.SetCurrentTab(tab) clientCtx.AddToTabHistory(tab) } // Load the URL err := clientCtx.GetClient().LoadURL(tab, url, timeout) if err != nil { return nil, fmt.Errorf("failed to load URL: %w", err) } message := fmt.Sprintf("Successfully navigated to %s in tab %s", url, tab) // Take screenshot if requested if takeScreenshot { screenshotPath := fmt.Sprintf("/tmp/navigate-%d.png", time.Now().Unix()) err = clientCtx.GetClient().TakeScreenshot(tab, screenshotPath, false, timeout) if err == nil { clientCtx.AddScreenshot(screenshotPath) message += fmt.Sprintf(" (screenshot saved to %s)", screenshotPath) } } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(message), }, IsError: false, }, nil } // Legacy navigation tool handler (for backward compatibility) handlers["web_navigate"] = handlers["web_navigate_cremotemcp"] // Web interaction tool handler handlers["web_interact"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } action := getStringParam(params, "action", "") selector := getStringParam(params, "selector", "") tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) if action == "" { return nil, fmt.Errorf("action parameter is required") } if selector == "" { return nil, fmt.Errorf("selector parameter is required") } var err error var message string switch action { case "click": err = clientCtx.GetClient().ClickElement(tab, selector, timeout) message = fmt.Sprintf("Clicked element: %s", selector) case "fill": value := getStringParam(params, "value", "") if value == "" { return nil, fmt.Errorf("value parameter is required for fill action") } err = clientCtx.GetClient().FillFormField(tab, selector, value, timeout) message = fmt.Sprintf("Filled element %s with value: %s", selector, value) case "submit": err = clientCtx.GetClient().SubmitForm(tab, selector, timeout) message = fmt.Sprintf("Submitted form: %s", selector) case "select": value := getStringParam(params, "value", "") if value == "" { return nil, fmt.Errorf("value parameter is required for select action") } err = clientCtx.GetClient().SelectElement(tab, selector, value, timeout) message = fmt.Sprintf("Selected option %s in element %s", value, selector) default: return nil, fmt.Errorf("unknown action: %s", action) } if err != nil { return nil, fmt.Errorf("failed to %s: %w", action, err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(message), }, IsError: false, }, nil } // Web extract tool handler handlers["web_extract"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } extractType := getStringParam(params, "type", "") tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) if extractType == "" { return nil, fmt.Errorf("type parameter is required") } var result interface{} var err error switch extractType { case "source": result, err = clientCtx.GetClient().GetPageSource(tab, timeout) case "element": selector := getStringParam(params, "selector", "") if selector == "" { return nil, fmt.Errorf("selector parameter is required for element extraction") } result, err = clientCtx.GetClient().GetElementHTML(tab, selector, timeout) case "javascript": code := getStringParam(params, "code", "") if code == "" { return nil, fmt.Errorf("code parameter is required for javascript extraction") } result, err = clientCtx.GetClient().EvalJS(tab, code, timeout) default: return nil, fmt.Errorf("unknown extraction type: %s", extractType) } if err != nil { return nil, fmt.Errorf("failed to extract %s: %w", extractType, err) } resultJSON, err := json.MarshalIndent(result, "", " ") if err != nil { return nil, fmt.Errorf("failed to marshal result: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(string(resultJSON)), }, IsError: false, }, nil } // Web screenshot tool handler handlers["web_screenshot"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } output := getStringParam(params, "output", "") if output == "" { return nil, fmt.Errorf("output parameter is required") } tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) fullPage := getBoolParam(params, "full_page", false) err := clientCtx.GetClient().TakeScreenshot(tab, output, fullPage, timeout) if err != nil { return nil, fmt.Errorf("failed to take screenshot: %w", err) } clientCtx.AddScreenshot(output) return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(fmt.Sprintf("Screenshot saved to: %s", output)), }, IsError: false, }, nil } // Web manage tabs tool handler handlers["web_manage_tabs"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } action := getStringParam(params, "action", "") tab := getStringParam(params, "tab", "") timeout := getIntParam(params, "timeout", 5) if action == "" { return nil, fmt.Errorf("action parameter is required") } var err error var message string switch action { case "open": newTab, err := clientCtx.GetClient().OpenTab(timeout) if err != nil { return nil, fmt.Errorf("failed to open new tab: %w", err) } clientCtx.SetCurrentTab(newTab) clientCtx.AddToTabHistory(newTab) message = fmt.Sprintf("Opened new tab: %s", newTab) case "close": if tab == "" { tab = clientCtx.GetCurrentTab() } if tab == "" { return nil, fmt.Errorf("no tab to close") } err = clientCtx.GetClient().CloseTab(tab, timeout) if err != nil { return nil, fmt.Errorf("failed to close tab: %w", err) } // Clear current tab if we closed it if tab == clientCtx.GetCurrentTab() { clientCtx.SetCurrentTab("") } message = fmt.Sprintf("Closed tab: %s", tab) case "list": tabs, err := clientCtx.GetClient().ListTabs() if err != nil { return nil, fmt.Errorf("failed to list tabs: %w", err) } message = fmt.Sprintf("Found %d tabs: %v", len(tabs), tabs) case "switch": if tab == "" { return nil, fmt.Errorf("tab parameter is required for switch action") } clientCtx.SetCurrentTab(tab) clientCtx.AddToTabHistory(tab) message = fmt.Sprintf("Switched to tab: %s", tab) default: return nil, fmt.Errorf("unknown tab action: %s", action) } if err != nil { return nil, fmt.Errorf("failed to %s tab: %w", action, err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(message), }, IsError: false, }, nil } // Web iframe tool handler handlers["web_iframe"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } action := getStringParam(params, "action", "") tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) if action == "" { return nil, fmt.Errorf("action parameter is required") } var err error var message string switch action { case "enter": selector := getStringParam(params, "selector", "") if selector == "" { return nil, fmt.Errorf("selector parameter is required for enter action") } err = clientCtx.GetClient().SwitchToIframe(tab, selector, timeout) clientCtx.SetIframeMode(true) message = fmt.Sprintf("Entered iframe: %s", selector) case "exit": err = clientCtx.GetClient().SwitchToMain(tab, timeout) clientCtx.SetIframeMode(false) message = "Exited iframe context" default: return nil, fmt.Errorf("unknown iframe action: %s", action) } if err != nil { return nil, fmt.Errorf("failed to %s iframe: %w", action, err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(message), }, IsError: false, }, nil } // Console logs tool handler handlers["console_logs"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) clear := getBoolParam(params, "clear", false) logs, err := clientCtx.GetClient().GetConsoleLogs(tab, clear) if err != nil { return nil, fmt.Errorf("failed to get console logs: %w", err) } logsJSON, err := json.MarshalIndent(logs, "", " ") if err != nil { return nil, fmt.Errorf("failed to marshal logs: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(string(logsJSON)), }, IsError: false, }, nil } // Console command tool handler handlers["console_command"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } command := getStringParam(params, "command", "") tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) if command == "" { return nil, fmt.Errorf("command parameter is required") } result, err := clientCtx.GetClient().EvalJS(tab, command, timeout) if err != nil { return nil, fmt.Errorf("failed to execute console command: %w", err) } resultJSON, err := json.MarshalIndent(result, "", " ") if err != nil { return nil, fmt.Errorf("failed to marshal result: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(string(resultJSON)), }, IsError: false, }, nil } // File upload tool handler handlers["file_upload"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } localPath := getStringParam(params, "local_path", "") containerPath := getStringParam(params, "container_path", "") if localPath == "" { return nil, fmt.Errorf("local_path parameter is required") } result, err := clientCtx.GetClient().UploadFileToContainer(localPath, containerPath) if err != nil { return nil, fmt.Errorf("failed to upload file: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(fmt.Sprintf("File uploaded successfully to: %s", result)), }, IsError: false, }, nil } // File download tool handler handlers["file_download"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } containerPath := getStringParam(params, "container_path", "") localPath := getStringParam(params, "local_path", "") if containerPath == "" { return nil, fmt.Errorf("container_path parameter is required") } if localPath == "" { return nil, fmt.Errorf("local_path parameter is required") } err := clientCtx.GetClient().DownloadFileFromContainer(containerPath, localPath) if err != nil { return nil, fmt.Errorf("failed to download file: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(fmt.Sprintf("File downloaded successfully to: %s", localPath)), }, IsError: false, }, nil } // Enhanced tools with _cremotemcp suffix // Web element check tool handler handlers["web_element_check_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } selector := getStringParam(params, "selector", "") if selector == "" { return nil, fmt.Errorf("selector parameter is required") } tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) checkType := getStringParam(params, "check_type", "exists") result, err := clientCtx.GetClient().CheckElement(tab, selector, checkType, timeout) if err != nil { return nil, fmt.Errorf("failed to check element: %w", err) } resultJSON, err := json.MarshalIndent(result, "", " ") if err != nil { return nil, fmt.Errorf("failed to marshal result: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(string(resultJSON)), }, IsError: false, }, nil } // Web element attributes tool handler handlers["web_element_attributes_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } selector := getStringParam(params, "selector", "") if selector == "" { return nil, fmt.Errorf("selector parameter is required") } tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) attributes := getStringParam(params, "attributes", "all") result, err := clientCtx.GetClient().GetElementAttributes(tab, selector, attributes, timeout) if err != nil { return nil, fmt.Errorf("failed to get element attributes: %w", err) } resultJSON, err := json.MarshalIndent(result, "", " ") if err != nil { return nil, fmt.Errorf("failed to marshal result: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(string(resultJSON)), }, IsError: false, }, nil } // Web extract multiple tool handler handlers["web_extract_multiple_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } selectors, ok := params["selectors"].(map[string]interface{}) if !ok { return nil, fmt.Errorf("selectors parameter is required and must be an object") } tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) // Convert selectors to map[string]string selectorsMap := make(map[string]string) for key, value := range selectors { if strValue, ok := value.(string); ok { selectorsMap[key] = strValue } } result, err := clientCtx.GetClient().ExtractMultiple(tab, selectorsMap, timeout) if err != nil { return nil, fmt.Errorf("failed to extract multiple elements: %w", err) } resultJSON, err := json.MarshalIndent(result, "", " ") if err != nil { return nil, fmt.Errorf("failed to marshal result: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(string(resultJSON)), }, IsError: false, }, nil } // Add remaining key tools // Web extract links tool handler handlers["web_extract_links_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) containerSelector := getStringParam(params, "container_selector", "") hrefPattern := getStringParam(params, "href_pattern", "") textPattern := getStringParam(params, "text_pattern", "") result, err := clientCtx.GetClient().ExtractLinks(tab, containerSelector, hrefPattern, textPattern, timeout) if err != nil { return nil, fmt.Errorf("failed to extract links: %w", err) } resultJSON, err := json.MarshalIndent(result, "", " ") if err != nil { return nil, fmt.Errorf("failed to marshal result: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(string(resultJSON)), }, IsError: false, }, nil } // Web extract table tool handler handlers["web_extract_table_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } selector := getStringParam(params, "selector", "") if selector == "" { return nil, fmt.Errorf("selector parameter is required") } tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) includeHeaders := getBoolParam(params, "include_headers", true) result, err := clientCtx.GetClient().ExtractTable(tab, selector, includeHeaders, timeout) if err != nil { return nil, fmt.Errorf("failed to extract table: %w", err) } resultJSON, err := json.MarshalIndent(result, "", " ") if err != nil { return nil, fmt.Errorf("failed to marshal result: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(string(resultJSON)), }, IsError: false, }, nil } // Web page info tool handler handlers["web_page_info_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) result, err := clientCtx.GetClient().GetPageInfo(tab, timeout) if err != nil { return nil, fmt.Errorf("failed to get page info: %w", err) } resultJSON, err := json.MarshalIndent(result, "", " ") if err != nil { return nil, fmt.Errorf("failed to marshal result: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(string(resultJSON)), }, IsError: false, }, nil } // File management tool handler handlers["file_management_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } operation := getStringParam(params, "operation", "") pattern := getStringParam(params, "pattern", "") maxAge := getStringParam(params, "max_age", "") if operation == "" { return nil, fmt.Errorf("operation parameter is required") } result, err := clientCtx.GetClient().ManageFiles(operation, pattern, maxAge) if err != nil { return nil, fmt.Errorf("failed to manage files: %w", err) } resultJSON, err := json.MarshalIndent(result, "", " ") if err != nil { return nil, fmt.Errorf("failed to marshal result: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(string(resultJSON)), }, IsError: false, }, nil } // Accessibility tree tool handlers // Get full accessibility tree handlers["get_accessibility_tree_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) // Parse depth parameter var depth *int if depthParam := getIntParam(params, "depth", -1); depthParam >= 0 { depth = &depthParam } result, err := clientCtx.GetClient().GetAccessibilityTree(tab, depth, timeout) if err != nil { return nil, fmt.Errorf("failed to get accessibility tree: %w", err) } resultJSON, err := json.MarshalIndent(result, "", " ") if err != nil { return nil, fmt.Errorf("failed to marshal result: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(string(resultJSON)), }, IsError: false, }, nil } // Get partial accessibility tree for specific element handlers["get_partial_accessibility_tree_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } selector := getStringParam(params, "selector", "") if selector == "" { return nil, fmt.Errorf("selector parameter is required") } tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) fetchRelatives := getBoolParam(params, "fetch_relatives", true) result, err := clientCtx.GetClient().GetPartialAccessibilityTree(tab, selector, fetchRelatives, timeout) if err != nil { return nil, fmt.Errorf("failed to get partial accessibility tree: %w", err) } resultJSON, err := json.MarshalIndent(result, "", " ") if err != nil { return nil, fmt.Errorf("failed to marshal result: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(string(resultJSON)), }, IsError: false, }, nil } // Query accessibility tree for nodes matching criteria handlers["query_accessibility_tree_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { clientCtx := getContext() params, ok := request.Params.Arguments.(map[string]any) if !ok { return nil, fmt.Errorf("invalid arguments format") } tab := getStringParam(params, "tab", clientCtx.GetCurrentTab()) timeout := getIntParam(params, "timeout", 5) selector := getStringParam(params, "selector", "") accessibleName := getStringParam(params, "accessible_name", "") role := getStringParam(params, "role", "") // At least one search criteria must be provided if selector == "" && accessibleName == "" && role == "" { return nil, fmt.Errorf("at least one search criteria (selector, accessible_name, or role) must be provided") } result, err := clientCtx.GetClient().QueryAccessibilityTree(tab, selector, accessibleName, role, timeout) if err != nil { return nil, fmt.Errorf("failed to query accessibility tree: %w", err) } resultJSON, err := json.MarshalIndent(result, "", " ") if err != nil { return nil, fmt.Errorf("failed to marshal result: %w", err) } return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(string(resultJSON)), }, IsError: false, }, nil } return handlers } // registerAllTools registers all tools with their schemas func registerAllTools(mcpServer *server.MCPServer, handlers map[string]func(context.Context, mcp.CallToolRequest) (*mcp.CallToolResult, error)) { // Version tool mcpServer.AddTool(mcp.Tool{ Name: "version_cremotemcp", Description: "Get version information for MCP server and daemon", InputSchema: mcp.ToolInputSchema{ Type: "object", Properties: map[string]any{}, Required: []string{}, }, }, handlers["version_cremotemcp"]) // Navigation tools mcpServer.AddTool(mcp.Tool{ Name: "web_navigate_cremotemcp", Description: "Navigate to a URL and optionally take a screenshot", InputSchema: mcp.ToolInputSchema{ Type: "object", Properties: map[string]any{ "url": map[string]any{ "type": "string", "description": "URL to navigate to", }, "tab": map[string]any{ "type": "string", "description": "Tab ID (optional, uses current tab)", }, "timeout": map[string]any{ "type": "integer", "description": "Timeout in seconds (default: 5)", "default": 5, }, "screenshot": map[string]any{ "type": "boolean", "description": "Take screenshot after navigation", }, }, Required: []string{"url"}, }, }, handlers["web_navigate_cremotemcp"]) // Legacy navigation tool mcpServer.AddTool(mcp.Tool{ Name: "web_navigate", Description: "Navigate to a URL and optionally take a screenshot", InputSchema: mcp.ToolInputSchema{ Type: "object", Properties: map[string]any{ "url": map[string]any{ "type": "string", "description": "URL to navigate to", }, "tab": map[string]any{ "type": "string", "description": "Tab ID (optional, uses current tab)", }, "timeout": map[string]any{ "type": "integer", "description": "Timeout in seconds (default: 5)", "default": 5, }, "screenshot": map[string]any{ "type": "boolean", "description": "Take screenshot after navigation", }, }, Required: []string{"url"}, }, }, handlers["web_navigate"]) // Interaction tool mcpServer.AddTool(mcp.Tool{ Name: "web_interact", Description: "Interact with web elements (click, fill, submit)", InputSchema: mcp.ToolInputSchema{ Type: "object", Properties: map[string]any{ "action": map[string]any{ "type": "string", "description": "Action to perform", "enum": []string{"click", "fill", "submit", "select"}, }, "selector": map[string]any{ "type": "string", "description": "CSS selector for the element", }, "tab": map[string]any{ "type": "string", "description": "Tab ID (optional)", }, "timeout": map[string]any{ "type": "integer", "description": "Timeout in seconds", "default": 5, }, "value": map[string]any{ "type": "string", "description": "Value to fill (for fill/select actions)", }, }, Required: []string{"action", "selector"}, }, }, handlers["web_interact"]) // Add more key tools if handler, exists := handlers["web_extract"]; exists { mcpServer.AddTool(mcp.Tool{ Name: "web_extract", Description: "Extract data from the page (source, element HTML, or execute JavaScript)", InputSchema: mcp.ToolInputSchema{ Type: "object", Properties: map[string]any{ "type": map[string]any{ "type": "string", "description": "Type of extraction", "enum": []string{"source", "element", "javascript"}, }, "selector": map[string]any{ "type": "string", "description": "CSS selector (for element type)", }, "code": map[string]any{ "type": "string", "description": "JavaScript code (for javascript type)", }, "tab": map[string]any{ "type": "string", "description": "Tab ID (optional)", }, "timeout": map[string]any{ "type": "integer", "description": "Timeout in seconds", "default": 5, }, }, Required: []string{"type"}, }, }, handler) } if handler, exists := handlers["web_screenshot"]; exists { mcpServer.AddTool(mcp.Tool{ Name: "web_screenshot", Description: "Take a screenshot of the current page", InputSchema: mcp.ToolInputSchema{ Type: "object", Properties: map[string]any{ "output": map[string]any{ "type": "string", "description": "Output file path", }, "tab": map[string]any{ "type": "string", "description": "Tab ID (optional)", }, "timeout": map[string]any{ "type": "integer", "description": "Timeout in seconds", "default": 5, }, "full_page": map[string]any{ "type": "boolean", "description": "Capture full page", "default": false, }, }, Required: []string{"output"}, }, }, handler) } // Accessibility tree tools if handler, exists := handlers["get_accessibility_tree_cremotemcp"]; exists { mcpServer.AddTool(mcp.Tool{ Name: "get_accessibility_tree_cremotemcp", Description: "Get the full accessibility tree for a page or with limited depth", InputSchema: mcp.ToolInputSchema{ Type: "object", Properties: map[string]any{ "tab": map[string]any{ "type": "string", "description": "Tab ID (optional, uses current tab)", }, "depth": map[string]any{ "type": "integer", "description": "Maximum depth to retrieve (optional, omit for full tree)", "minimum": 0, }, "timeout": map[string]any{ "type": "integer", "description": "Timeout in seconds", "default": 5, }, }, Required: []string{}, }, }, handler) } if handler, exists := handlers["get_partial_accessibility_tree_cremotemcp"]; exists { mcpServer.AddTool(mcp.Tool{ Name: "get_partial_accessibility_tree_cremotemcp", Description: "Get accessibility tree for a specific element and its relatives", InputSchema: mcp.ToolInputSchema{ Type: "object", Properties: map[string]any{ "selector": map[string]any{ "type": "string", "description": "CSS selector for the target element", }, "tab": map[string]any{ "type": "string", "description": "Tab ID (optional, uses current tab)", }, "fetch_relatives": map[string]any{ "type": "boolean", "description": "Whether to fetch ancestors, siblings, and children", "default": true, }, "timeout": map[string]any{ "type": "integer", "description": "Timeout in seconds", "default": 5, }, }, Required: []string{"selector"}, }, }, handler) } if handler, exists := handlers["query_accessibility_tree_cremotemcp"]; exists { mcpServer.AddTool(mcp.Tool{ Name: "query_accessibility_tree_cremotemcp", Description: "Query accessibility tree for nodes matching specific criteria", InputSchema: mcp.ToolInputSchema{ Type: "object", Properties: map[string]any{ "tab": map[string]any{ "type": "string", "description": "Tab ID (optional, uses current tab)", }, "selector": map[string]any{ "type": "string", "description": "CSS selector to limit search scope (optional)", }, "accessible_name": map[string]any{ "type": "string", "description": "Accessible name to match (optional)", }, "role": map[string]any{ "type": "string", "description": "ARIA role to match (optional)", }, "timeout": map[string]any{ "type": "integer", "description": "Timeout in seconds", "default": 5, }, }, Required: []string{}, }, }, handler) } } func main() { // Get cremote daemon connection settings cremoteHost := os.Getenv("CREMOTE_HOST") if cremoteHost == "" { cremoteHost = "localhost" } cremotePortStr := os.Getenv("CREMOTE_PORT") cremotePort := 8989 if cremotePortStr != "" { if p, err := strconv.Atoi(cremotePortStr); err == nil { cremotePort = p } } // Get transport mode (stdio or http) transportMode := os.Getenv("CREMOTE_TRANSPORT") if transportMode == "" { transportMode = "stdio" // Default to stdio for backward compatibility } log.Printf("Starting cremote MCP server, transport: %s, connecting to cremote daemon at %s:%d", transportMode, cremoteHost, cremotePort) if transportMode == "http" { // Multi-client HTTP mode sessionManager := NewSessionManager(cremoteHost, cremotePort) startHTTPServer(sessionManager) } else { // Single-client stdio mode (legacy) cremoteServer := NewCremoteServer(cremoteHost, cremotePort) startStdioServer(cremoteServer) } } func startStdioServer(cremoteServer *CremoteServer) { // Create MCP server mcpServer := server.NewMCPServer("cremote-mcp", "2.0.0") // Create session-aware tool handlers toolHandlers := createToolHandlers(func() ClientContext { return cremoteServer }) // Register all tools registerAllTools(mcpServer, toolHandlers) // Start the server log.Printf("Cremote MCP server ready (stdio mode)") if err := server.ServeStdio(mcpServer); err != nil { log.Fatalf("Server error: %v", err) } } func startHTTPServer(sessionManager *SessionManager) { // Start session cleanup routine go func() { ticker := time.NewTicker(5 * time.Minute) defer ticker.Stop() for { select { case <-ticker.C: sessionManager.CleanupExpiredSessions(30 * time.Minute) } } }() // Get HTTP server configuration httpHost := os.Getenv("CREMOTE_HTTP_HOST") if httpHost == "" { httpHost = "localhost" } httpPortStr := os.Getenv("CREMOTE_HTTP_PORT") httpPort := 8990 if httpPortStr != "" { if p, err := strconv.Atoi(httpPortStr); err == nil { httpPort = p } } // Create HTTP server mux := http.NewServeMux() mux.HandleFunc("/mcp", func(w http.ResponseWriter, r *http.Request) { handleMCPRequest(w, r, sessionManager) }) server := &http.Server{ Addr: fmt.Sprintf("%s:%d", httpHost, httpPort), Handler: mux, } log.Printf("Cremote MCP HTTP server ready on %s", server.Addr) if err := server.ListenAndServe(); err != nil { log.Fatalf("HTTP server error: %v", err) } } // handleMCPRequest handles HTTP MCP requests with session management func handleMCPRequest(w http.ResponseWriter, r *http.Request, sessionManager *SessionManager) { // Set CORS headers w.Header().Set("Access-Control-Allow-Origin", "*") w.Header().Set("Access-Control-Allow-Methods", "GET, POST, DELETE, OPTIONS") w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Accept, Mcp-Session-Id, MCP-Protocol-Version") if r.Method == "OPTIONS" { w.WriteHeader(http.StatusOK) return } // Get or create session sessionID := r.Header.Get("Mcp-Session-Id") var session *ClientSession var isNewSession bool if sessionID == "" { // Create new session for initialization session = sessionManager.CreateSession() isNewSession = true w.Header().Set("Mcp-Session-Id", session.ID) } else { // Get existing session var exists bool session, exists = sessionManager.GetSession(sessionID) if !exists { http.Error(w, "Session not found", http.StatusNotFound) return } } // Handle different HTTP methods according to MCP Streamable HTTP spec switch r.Method { case "POST": handleMCPPost(w, r, session, isNewSession) case "GET": handleMCPGet(w, r, session) case "DELETE": handleMCPDelete(w, r, sessionManager, sessionID) default: http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) } } // handleMCPPost handles POST requests (JSON-RPC messages) func handleMCPPost(w http.ResponseWriter, r *http.Request, session *ClientSession, isNewSession bool) { // Read request body var jsonRPCRequest map[string]interface{} if err := json.NewDecoder(r.Body).Decode(&jsonRPCRequest); err != nil { http.Error(w, "Invalid JSON", http.StatusBadRequest) return } // For now, implement a basic response for initialization // TODO: Implement full JSON-RPC handling with session-aware tools method, ok := jsonRPCRequest["method"].(string) if !ok { http.Error(w, "Missing method", http.StatusBadRequest) return } if method == "initialize" { // Handle initialization response := map[string]interface{}{ "jsonrpc": "2.0", "id": jsonRPCRequest["id"], "result": map[string]interface{}{ "protocolVersion": "2025-06-18", "capabilities": map[string]interface{}{ "tools": map[string]interface{}{}, }, "serverInfo": map[string]interface{}{ "name": "cremote-mcp", "version": Version, }, }, } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(response) return } // For other methods, return not implemented for now w.WriteHeader(http.StatusNotImplemented) w.Write([]byte("Method not yet implemented in HTTP transport")) } // handleMCPGet handles GET requests (SSE streams) func handleMCPGet(w http.ResponseWriter, r *http.Request, session *ClientSession) { // For now, return method not allowed as we don't implement SSE yet http.Error(w, "SSE not yet implemented", http.StatusMethodNotAllowed) } // handleMCPDelete handles DELETE requests (session termination) func handleMCPDelete(w http.ResponseWriter, r *http.Request, sessionManager *SessionManager, sessionID string) { if sessionID == "" { http.Error(w, "No session to delete", http.StatusBadRequest) return } sessionManager.RemoveSession(sessionID) w.WriteHeader(http.StatusOK) w.Write([]byte("Session terminated")) }