cremote/mcp/main.go

1582 lines
45 KiB
Go

package main
import (
"context"
"crypto/rand"
"encoding/hex"
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"strconv"
"sync"
"time"
"git.teamworkapps.com/shortcut/cremote/client"
"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
)
const Version = "2.0.0"
// ClientContext provides a unified interface for accessing client state
// This allows the same tool handlers to work with both single-client and multi-client modes
type ClientContext interface {
GetClient() *client.Client
GetCurrentTab() string
SetCurrentTab(tabID string)
AddToTabHistory(tabID string)
GetTabHistory() []string
SetIframeMode(mode bool)
GetIframeMode() bool
AddScreenshot(path string)
GetScreenshots() []string
}
// ClientSession represents an isolated session for a single MCP client
type ClientSession struct {
ID string
client *client.Client
currentTab string
tabHistory []string
iframeMode bool
screenshots []string
createdAt time.Time
lastUsed time.Time
mu sync.RWMutex
}
// SessionManager manages multiple client sessions
type SessionManager struct {
sessions map[string]*ClientSession
mu sync.RWMutex
host string
port int
}
// CremoteServer wraps the cremote client for MCP (legacy single-client mode)
type CremoteServer struct {
client *client.Client
currentTab string
tabHistory []string
iframeMode bool
screenshots []string
}
// NewSessionManager creates a new session manager
func NewSessionManager(host string, port int) *SessionManager {
return &SessionManager{
sessions: make(map[string]*ClientSession),
host: host,
port: port,
}
}
// generateSessionID creates a cryptographically secure session ID
func generateSessionID() string {
bytes := make([]byte, 16)
rand.Read(bytes)
return hex.EncodeToString(bytes)
}
// CreateSession creates a new client session
func (sm *SessionManager) CreateSession() *ClientSession {
sm.mu.Lock()
defer sm.mu.Unlock()
session := &ClientSession{
ID: generateSessionID(),
client: client.NewClient(sm.host, sm.port),
tabHistory: make([]string, 0),
screenshots: make([]string, 0),
createdAt: time.Now(),
lastUsed: time.Now(),
}
sm.sessions[session.ID] = session
return session
}
// GetSession retrieves a session by ID
func (sm *SessionManager) GetSession(sessionID string) (*ClientSession, bool) {
sm.mu.RLock()
defer sm.mu.RUnlock()
session, exists := sm.sessions[sessionID]
if exists {
session.mu.Lock()
session.lastUsed = time.Now()
session.mu.Unlock()
}
return session, exists
}
// RemoveSession removes a session
func (sm *SessionManager) RemoveSession(sessionID string) {
sm.mu.Lock()
defer sm.mu.Unlock()
delete(sm.sessions, sessionID)
}
// CleanupExpiredSessions removes sessions that haven't been used recently
func (sm *SessionManager) CleanupExpiredSessions(maxAge time.Duration) {
sm.mu.Lock()
defer sm.mu.Unlock()
now := time.Now()
for id, session := range sm.sessions {
session.mu.RLock()
if now.Sub(session.lastUsed) > maxAge {
delete(sm.sessions, id)
}
session.mu.RUnlock()
}
}
// Session-aware methods for ClientSession
func (cs *ClientSession) GetCurrentTab() string {
cs.mu.RLock()
defer cs.mu.RUnlock()
return cs.currentTab
}
func (cs *ClientSession) SetCurrentTab(tabID string) {
cs.mu.Lock()
defer cs.mu.Unlock()
cs.currentTab = tabID
cs.lastUsed = time.Now()
}
func (cs *ClientSession) AddToTabHistory(tabID string) {
cs.mu.Lock()
defer cs.mu.Unlock()
cs.tabHistory = append(cs.tabHistory, tabID)
cs.lastUsed = time.Now()
}
func (cs *ClientSession) GetTabHistory() []string {
cs.mu.RLock()
defer cs.mu.RUnlock()
// Return a copy to prevent external modification
history := make([]string, len(cs.tabHistory))
copy(history, cs.tabHistory)
return history
}
func (cs *ClientSession) SetIframeMode(mode bool) {
cs.mu.Lock()
defer cs.mu.Unlock()
cs.iframeMode = mode
cs.lastUsed = time.Now()
}
func (cs *ClientSession) GetIframeMode() bool {
cs.mu.RLock()
defer cs.mu.RUnlock()
return cs.iframeMode
}
func (cs *ClientSession) AddScreenshot(path string) {
cs.mu.Lock()
defer cs.mu.Unlock()
cs.screenshots = append(cs.screenshots, path)
cs.lastUsed = time.Now()
}
func (cs *ClientSession) GetScreenshots() []string {
cs.mu.RLock()
defer cs.mu.RUnlock()
// Return a copy to prevent external modification
screenshots := make([]string, len(cs.screenshots))
copy(screenshots, cs.screenshots)
return screenshots
}
// Implement ClientContext interface for ClientSession
func (cs *ClientSession) GetClient() *client.Client {
return cs.client
}
// NewCremoteServer creates a new cremote MCP server (legacy single-client mode)
func NewCremoteServer(host string, port int) *CremoteServer {
return &CremoteServer{
client: client.NewClient(host, port),
tabHistory: make([]string, 0),
screenshots: make([]string, 0),
}
}
// Implement ClientContext interface for CremoteServer (legacy mode)
func (cs *CremoteServer) GetClient() *client.Client {
return cs.client
}
func (cs *CremoteServer) GetCurrentTab() string {
return cs.currentTab
}
func (cs *CremoteServer) SetCurrentTab(tabID string) {
cs.currentTab = tabID
}
func (cs *CremoteServer) AddToTabHistory(tabID string) {
cs.tabHistory = append(cs.tabHistory, tabID)
}
func (cs *CremoteServer) GetTabHistory() []string {
// Return a copy to prevent external modification
history := make([]string, len(cs.tabHistory))
copy(history, cs.tabHistory)
return history
}
func (cs *CremoteServer) SetIframeMode(mode bool) {
cs.iframeMode = mode
}
func (cs *CremoteServer) GetIframeMode() bool {
return cs.iframeMode
}
func (cs *CremoteServer) AddScreenshot(path string) {
cs.screenshots = append(cs.screenshots, path)
}
func (cs *CremoteServer) GetScreenshots() []string {
// Return a copy to prevent external modification
screenshots := make([]string, len(cs.screenshots))
copy(screenshots, cs.screenshots)
return screenshots
}
// Helper functions for parameter extraction
func getStringParam(params map[string]any, key, defaultValue string) string {
if val, ok := params[key].(string); ok {
return val
}
return defaultValue
}
func getBoolParam(params map[string]any, key string, defaultValue bool) bool {
if val, ok := params[key].(bool); ok {
return val
}
return defaultValue
}
func getIntParam(params map[string]any, key string, defaultValue int) int {
if val, ok := params[key].(float64); ok {
return int(val)
}
if val, ok := params[key].(int); ok {
return val
}
return defaultValue
}
// createToolHandlers creates session-aware tool handlers that work with ClientContext
func createToolHandlers(getContext func() ClientContext) map[string]func(context.Context, mcp.CallToolRequest) (*mcp.CallToolResult, error) {
handlers := make(map[string]func(context.Context, mcp.CallToolRequest) (*mcp.CallToolResult, error))
// Version tool handler
handlers["version_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
daemonVersion, err := clientCtx.GetClient().GetVersion()
if err != nil {
daemonVersion = fmt.Sprintf("Unable to connect: %v", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("MCP Server version: %s\nDaemon version: %s", Version, daemonVersion)),
},
IsError: false,
}, nil
}
// Navigation tool handler
handlers["web_navigate_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
url := getStringParam(params, "url", "")
if url == "" {
return nil, fmt.Errorf("url parameter is required")
}
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
takeScreenshot := getBoolParam(params, "screenshot", false)
// If no tab specified and no current tab, create a new one
if tab == "" {
newTab, err := clientCtx.GetClient().OpenTab(timeout)
if err != nil {
return nil, fmt.Errorf("failed to create new tab: %w", err)
}
tab = newTab
clientCtx.SetCurrentTab(tab)
clientCtx.AddToTabHistory(tab)
}
// Load the URL
err := clientCtx.GetClient().LoadURL(tab, url, timeout)
if err != nil {
return nil, fmt.Errorf("failed to load URL: %w", err)
}
message := fmt.Sprintf("Successfully navigated to %s in tab %s", url, tab)
// Take screenshot if requested
if takeScreenshot {
screenshotPath := fmt.Sprintf("/tmp/navigate-%d.png", time.Now().Unix())
err = clientCtx.GetClient().TakeScreenshot(tab, screenshotPath, false, timeout)
if err == nil {
clientCtx.AddScreenshot(screenshotPath)
message += fmt.Sprintf(" (screenshot saved to %s)", screenshotPath)
}
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(message),
},
IsError: false,
}, nil
}
// Legacy navigation tool handler (for backward compatibility)
handlers["web_navigate"] = handlers["web_navigate_cremotemcp"]
// Web interaction tool handler
handlers["web_interact"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
action := getStringParam(params, "action", "")
selector := getStringParam(params, "selector", "")
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
if action == "" {
return nil, fmt.Errorf("action parameter is required")
}
if selector == "" {
return nil, fmt.Errorf("selector parameter is required")
}
var err error
var message string
switch action {
case "click":
err = clientCtx.GetClient().ClickElement(tab, selector, timeout)
message = fmt.Sprintf("Clicked element: %s", selector)
case "fill":
value := getStringParam(params, "value", "")
if value == "" {
return nil, fmt.Errorf("value parameter is required for fill action")
}
err = clientCtx.GetClient().FillFormField(tab, selector, value, timeout)
message = fmt.Sprintf("Filled element %s with value: %s", selector, value)
case "submit":
err = clientCtx.GetClient().SubmitForm(tab, selector, timeout)
message = fmt.Sprintf("Submitted form: %s", selector)
case "select":
value := getStringParam(params, "value", "")
if value == "" {
return nil, fmt.Errorf("value parameter is required for select action")
}
err = clientCtx.GetClient().SelectElement(tab, selector, value, timeout)
message = fmt.Sprintf("Selected option %s in element %s", value, selector)
default:
return nil, fmt.Errorf("unknown action: %s", action)
}
if err != nil {
return nil, fmt.Errorf("failed to %s: %w", action, err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(message),
},
IsError: false,
}, nil
}
// Web extract tool handler
handlers["web_extract"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
extractType := getStringParam(params, "type", "")
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
if extractType == "" {
return nil, fmt.Errorf("type parameter is required")
}
var result interface{}
var err error
switch extractType {
case "source":
result, err = clientCtx.GetClient().GetPageSource(tab, timeout)
case "element":
selector := getStringParam(params, "selector", "")
if selector == "" {
return nil, fmt.Errorf("selector parameter is required for element extraction")
}
result, err = clientCtx.GetClient().GetElementHTML(tab, selector, timeout)
case "javascript":
code := getStringParam(params, "code", "")
if code == "" {
return nil, fmt.Errorf("code parameter is required for javascript extraction")
}
result, err = clientCtx.GetClient().EvalJS(tab, code, timeout)
default:
return nil, fmt.Errorf("unknown extraction type: %s", extractType)
}
if err != nil {
return nil, fmt.Errorf("failed to extract %s: %w", extractType, err)
}
resultJSON, err := json.MarshalIndent(result, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal result: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(string(resultJSON)),
},
IsError: false,
}, nil
}
// Web screenshot tool handler
handlers["web_screenshot"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
output := getStringParam(params, "output", "")
if output == "" {
return nil, fmt.Errorf("output parameter is required")
}
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
fullPage := getBoolParam(params, "full_page", false)
err := clientCtx.GetClient().TakeScreenshot(tab, output, fullPage, timeout)
if err != nil {
return nil, fmt.Errorf("failed to take screenshot: %w", err)
}
clientCtx.AddScreenshot(output)
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("Screenshot saved to: %s", output)),
},
IsError: false,
}, nil
}
// Web manage tabs tool handler
handlers["web_manage_tabs"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
action := getStringParam(params, "action", "")
tab := getStringParam(params, "tab", "")
timeout := getIntParam(params, "timeout", 5)
if action == "" {
return nil, fmt.Errorf("action parameter is required")
}
var err error
var message string
switch action {
case "open":
newTab, err := clientCtx.GetClient().OpenTab(timeout)
if err != nil {
return nil, fmt.Errorf("failed to open new tab: %w", err)
}
clientCtx.SetCurrentTab(newTab)
clientCtx.AddToTabHistory(newTab)
message = fmt.Sprintf("Opened new tab: %s", newTab)
case "close":
if tab == "" {
tab = clientCtx.GetCurrentTab()
}
if tab == "" {
return nil, fmt.Errorf("no tab to close")
}
err = clientCtx.GetClient().CloseTab(tab, timeout)
if err != nil {
return nil, fmt.Errorf("failed to close tab: %w", err)
}
// Clear current tab if we closed it
if tab == clientCtx.GetCurrentTab() {
clientCtx.SetCurrentTab("")
}
message = fmt.Sprintf("Closed tab: %s", tab)
case "list":
tabs, err := clientCtx.GetClient().ListTabs()
if err != nil {
return nil, fmt.Errorf("failed to list tabs: %w", err)
}
message = fmt.Sprintf("Found %d tabs: %v", len(tabs), tabs)
case "switch":
if tab == "" {
return nil, fmt.Errorf("tab parameter is required for switch action")
}
clientCtx.SetCurrentTab(tab)
clientCtx.AddToTabHistory(tab)
message = fmt.Sprintf("Switched to tab: %s", tab)
default:
return nil, fmt.Errorf("unknown tab action: %s", action)
}
if err != nil {
return nil, fmt.Errorf("failed to %s tab: %w", action, err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(message),
},
IsError: false,
}, nil
}
// Web iframe tool handler
handlers["web_iframe"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
action := getStringParam(params, "action", "")
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
if action == "" {
return nil, fmt.Errorf("action parameter is required")
}
var err error
var message string
switch action {
case "enter":
selector := getStringParam(params, "selector", "")
if selector == "" {
return nil, fmt.Errorf("selector parameter is required for enter action")
}
err = clientCtx.GetClient().SwitchToIframe(tab, selector, timeout)
clientCtx.SetIframeMode(true)
message = fmt.Sprintf("Entered iframe: %s", selector)
case "exit":
err = clientCtx.GetClient().SwitchToMain(tab, timeout)
clientCtx.SetIframeMode(false)
message = "Exited iframe context"
default:
return nil, fmt.Errorf("unknown iframe action: %s", action)
}
if err != nil {
return nil, fmt.Errorf("failed to %s iframe: %w", action, err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(message),
},
IsError: false,
}, nil
}
// Console logs tool handler
handlers["console_logs"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
clear := getBoolParam(params, "clear", false)
logs, err := clientCtx.GetClient().GetConsoleLogs(tab, clear)
if err != nil {
return nil, fmt.Errorf("failed to get console logs: %w", err)
}
logsJSON, err := json.MarshalIndent(logs, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal logs: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(string(logsJSON)),
},
IsError: false,
}, nil
}
// Console command tool handler
handlers["console_command"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
command := getStringParam(params, "command", "")
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
if command == "" {
return nil, fmt.Errorf("command parameter is required")
}
result, err := clientCtx.GetClient().EvalJS(tab, command, timeout)
if err != nil {
return nil, fmt.Errorf("failed to execute console command: %w", err)
}
resultJSON, err := json.MarshalIndent(result, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal result: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(string(resultJSON)),
},
IsError: false,
}, nil
}
// File upload tool handler
handlers["file_upload"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
localPath := getStringParam(params, "local_path", "")
containerPath := getStringParam(params, "container_path", "")
if localPath == "" {
return nil, fmt.Errorf("local_path parameter is required")
}
result, err := clientCtx.GetClient().UploadFileToContainer(localPath, containerPath)
if err != nil {
return nil, fmt.Errorf("failed to upload file: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("File uploaded successfully to: %s", result)),
},
IsError: false,
}, nil
}
// File download tool handler
handlers["file_download"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
containerPath := getStringParam(params, "container_path", "")
localPath := getStringParam(params, "local_path", "")
if containerPath == "" {
return nil, fmt.Errorf("container_path parameter is required")
}
if localPath == "" {
return nil, fmt.Errorf("local_path parameter is required")
}
err := clientCtx.GetClient().DownloadFileFromContainer(containerPath, localPath)
if err != nil {
return nil, fmt.Errorf("failed to download file: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(fmt.Sprintf("File downloaded successfully to: %s", localPath)),
},
IsError: false,
}, nil
}
// Enhanced tools with _cremotemcp suffix
// Web element check tool handler
handlers["web_element_check_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
selector := getStringParam(params, "selector", "")
if selector == "" {
return nil, fmt.Errorf("selector parameter is required")
}
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
checkType := getStringParam(params, "check_type", "exists")
result, err := clientCtx.GetClient().CheckElement(tab, selector, checkType, timeout)
if err != nil {
return nil, fmt.Errorf("failed to check element: %w", err)
}
resultJSON, err := json.MarshalIndent(result, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal result: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(string(resultJSON)),
},
IsError: false,
}, nil
}
// Web element attributes tool handler
handlers["web_element_attributes_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
selector := getStringParam(params, "selector", "")
if selector == "" {
return nil, fmt.Errorf("selector parameter is required")
}
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
attributes := getStringParam(params, "attributes", "all")
result, err := clientCtx.GetClient().GetElementAttributes(tab, selector, attributes, timeout)
if err != nil {
return nil, fmt.Errorf("failed to get element attributes: %w", err)
}
resultJSON, err := json.MarshalIndent(result, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal result: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(string(resultJSON)),
},
IsError: false,
}, nil
}
// Web extract multiple tool handler
handlers["web_extract_multiple_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
selectors, ok := params["selectors"].(map[string]interface{})
if !ok {
return nil, fmt.Errorf("selectors parameter is required and must be an object")
}
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
// Convert selectors to map[string]string
selectorsMap := make(map[string]string)
for key, value := range selectors {
if strValue, ok := value.(string); ok {
selectorsMap[key] = strValue
}
}
result, err := clientCtx.GetClient().ExtractMultiple(tab, selectorsMap, timeout)
if err != nil {
return nil, fmt.Errorf("failed to extract multiple elements: %w", err)
}
resultJSON, err := json.MarshalIndent(result, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal result: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(string(resultJSON)),
},
IsError: false,
}, nil
}
// Add remaining key tools
// Web extract links tool handler
handlers["web_extract_links_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
containerSelector := getStringParam(params, "container_selector", "")
hrefPattern := getStringParam(params, "href_pattern", "")
textPattern := getStringParam(params, "text_pattern", "")
result, err := clientCtx.GetClient().ExtractLinks(tab, containerSelector, hrefPattern, textPattern, timeout)
if err != nil {
return nil, fmt.Errorf("failed to extract links: %w", err)
}
resultJSON, err := json.MarshalIndent(result, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal result: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(string(resultJSON)),
},
IsError: false,
}, nil
}
// Web extract table tool handler
handlers["web_extract_table_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
selector := getStringParam(params, "selector", "")
if selector == "" {
return nil, fmt.Errorf("selector parameter is required")
}
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
includeHeaders := getBoolParam(params, "include_headers", true)
result, err := clientCtx.GetClient().ExtractTable(tab, selector, includeHeaders, timeout)
if err != nil {
return nil, fmt.Errorf("failed to extract table: %w", err)
}
resultJSON, err := json.MarshalIndent(result, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal result: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(string(resultJSON)),
},
IsError: false,
}, nil
}
// Web page info tool handler
handlers["web_page_info_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
result, err := clientCtx.GetClient().GetPageInfo(tab, timeout)
if err != nil {
return nil, fmt.Errorf("failed to get page info: %w", err)
}
resultJSON, err := json.MarshalIndent(result, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal result: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(string(resultJSON)),
},
IsError: false,
}, nil
}
// File management tool handler
handlers["file_management_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
operation := getStringParam(params, "operation", "")
pattern := getStringParam(params, "pattern", "")
maxAge := getStringParam(params, "max_age", "")
if operation == "" {
return nil, fmt.Errorf("operation parameter is required")
}
result, err := clientCtx.GetClient().ManageFiles(operation, pattern, maxAge)
if err != nil {
return nil, fmt.Errorf("failed to manage files: %w", err)
}
resultJSON, err := json.MarshalIndent(result, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal result: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(string(resultJSON)),
},
IsError: false,
}, nil
}
// Accessibility tree tool handlers
// Get full accessibility tree
handlers["get_accessibility_tree_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
// Parse depth parameter
var depth *int
if depthParam := getIntParam(params, "depth", -1); depthParam >= 0 {
depth = &depthParam
}
result, err := clientCtx.GetClient().GetAccessibilityTree(tab, depth, timeout)
if err != nil {
return nil, fmt.Errorf("failed to get accessibility tree: %w", err)
}
resultJSON, err := json.MarshalIndent(result, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal result: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(string(resultJSON)),
},
IsError: false,
}, nil
}
// Get partial accessibility tree for specific element
handlers["get_partial_accessibility_tree_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
selector := getStringParam(params, "selector", "")
if selector == "" {
return nil, fmt.Errorf("selector parameter is required")
}
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
fetchRelatives := getBoolParam(params, "fetch_relatives", true)
result, err := clientCtx.GetClient().GetPartialAccessibilityTree(tab, selector, fetchRelatives, timeout)
if err != nil {
return nil, fmt.Errorf("failed to get partial accessibility tree: %w", err)
}
resultJSON, err := json.MarshalIndent(result, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal result: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(string(resultJSON)),
},
IsError: false,
}, nil
}
// Query accessibility tree for nodes matching criteria
handlers["query_accessibility_tree_cremotemcp"] = func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clientCtx := getContext()
params, ok := request.Params.Arguments.(map[string]any)
if !ok {
return nil, fmt.Errorf("invalid arguments format")
}
tab := getStringParam(params, "tab", clientCtx.GetCurrentTab())
timeout := getIntParam(params, "timeout", 5)
selector := getStringParam(params, "selector", "")
accessibleName := getStringParam(params, "accessible_name", "")
role := getStringParam(params, "role", "")
// At least one search criteria must be provided
if selector == "" && accessibleName == "" && role == "" {
return nil, fmt.Errorf("at least one search criteria (selector, accessible_name, or role) must be provided")
}
result, err := clientCtx.GetClient().QueryAccessibilityTree(tab, selector, accessibleName, role, timeout)
if err != nil {
return nil, fmt.Errorf("failed to query accessibility tree: %w", err)
}
resultJSON, err := json.MarshalIndent(result, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal result: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.NewTextContent(string(resultJSON)),
},
IsError: false,
}, nil
}
return handlers
}
// registerAllTools registers all tools with their schemas
func registerAllTools(mcpServer *server.MCPServer, handlers map[string]func(context.Context, mcp.CallToolRequest) (*mcp.CallToolResult, error)) {
// Version tool
mcpServer.AddTool(mcp.Tool{
Name: "version_cremotemcp",
Description: "Get version information for MCP server and daemon",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{},
Required: []string{},
},
}, handlers["version_cremotemcp"])
// Navigation tools
mcpServer.AddTool(mcp.Tool{
Name: "web_navigate_cremotemcp",
Description: "Navigate to a URL and optionally take a screenshot",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"url": map[string]any{
"type": "string",
"description": "URL to navigate to",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional, uses current tab)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds (default: 5)",
"default": 5,
},
"screenshot": map[string]any{
"type": "boolean",
"description": "Take screenshot after navigation",
},
},
Required: []string{"url"},
},
}, handlers["web_navigate_cremotemcp"])
// Legacy navigation tool
mcpServer.AddTool(mcp.Tool{
Name: "web_navigate",
Description: "Navigate to a URL and optionally take a screenshot",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"url": map[string]any{
"type": "string",
"description": "URL to navigate to",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional, uses current tab)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds (default: 5)",
"default": 5,
},
"screenshot": map[string]any{
"type": "boolean",
"description": "Take screenshot after navigation",
},
},
Required: []string{"url"},
},
}, handlers["web_navigate"])
// Interaction tool
mcpServer.AddTool(mcp.Tool{
Name: "web_interact",
Description: "Interact with web elements (click, fill, submit)",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"action": map[string]any{
"type": "string",
"description": "Action to perform",
"enum": []string{"click", "fill", "submit", "select"},
},
"selector": map[string]any{
"type": "string",
"description": "CSS selector for the element",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
"value": map[string]any{
"type": "string",
"description": "Value to fill (for fill/select actions)",
},
},
Required: []string{"action", "selector"},
},
}, handlers["web_interact"])
// Add more key tools
if handler, exists := handlers["web_extract"]; exists {
mcpServer.AddTool(mcp.Tool{
Name: "web_extract",
Description: "Extract data from the page (source, element HTML, or execute JavaScript)",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"type": map[string]any{
"type": "string",
"description": "Type of extraction",
"enum": []string{"source", "element", "javascript"},
},
"selector": map[string]any{
"type": "string",
"description": "CSS selector (for element type)",
},
"code": map[string]any{
"type": "string",
"description": "JavaScript code (for javascript type)",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
Required: []string{"type"},
},
}, handler)
}
if handler, exists := handlers["web_screenshot"]; exists {
mcpServer.AddTool(mcp.Tool{
Name: "web_screenshot",
Description: "Take a screenshot of the current page",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"output": map[string]any{
"type": "string",
"description": "Output file path",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
"full_page": map[string]any{
"type": "boolean",
"description": "Capture full page",
"default": false,
},
},
Required: []string{"output"},
},
}, handler)
}
// Accessibility tree tools
if handler, exists := handlers["get_accessibility_tree_cremotemcp"]; exists {
mcpServer.AddTool(mcp.Tool{
Name: "get_accessibility_tree_cremotemcp",
Description: "Get the full accessibility tree for a page or with limited depth",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional, uses current tab)",
},
"depth": map[string]any{
"type": "integer",
"description": "Maximum depth to retrieve (optional, omit for full tree)",
"minimum": 0,
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
Required: []string{},
},
}, handler)
}
if handler, exists := handlers["get_partial_accessibility_tree_cremotemcp"]; exists {
mcpServer.AddTool(mcp.Tool{
Name: "get_partial_accessibility_tree_cremotemcp",
Description: "Get accessibility tree for a specific element and its relatives",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"selector": map[string]any{
"type": "string",
"description": "CSS selector for the target element",
},
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional, uses current tab)",
},
"fetch_relatives": map[string]any{
"type": "boolean",
"description": "Whether to fetch ancestors, siblings, and children",
"default": true,
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
Required: []string{"selector"},
},
}, handler)
}
if handler, exists := handlers["query_accessibility_tree_cremotemcp"]; exists {
mcpServer.AddTool(mcp.Tool{
Name: "query_accessibility_tree_cremotemcp",
Description: "Query accessibility tree for nodes matching specific criteria",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"tab": map[string]any{
"type": "string",
"description": "Tab ID (optional, uses current tab)",
},
"selector": map[string]any{
"type": "string",
"description": "CSS selector to limit search scope (optional)",
},
"accessible_name": map[string]any{
"type": "string",
"description": "Accessible name to match (optional)",
},
"role": map[string]any{
"type": "string",
"description": "ARIA role to match (optional)",
},
"timeout": map[string]any{
"type": "integer",
"description": "Timeout in seconds",
"default": 5,
},
},
Required: []string{},
},
}, handler)
}
}
func main() {
// Get cremote daemon connection settings
cremoteHost := os.Getenv("CREMOTE_HOST")
if cremoteHost == "" {
cremoteHost = "localhost"
}
cremotePortStr := os.Getenv("CREMOTE_PORT")
cremotePort := 8989
if cremotePortStr != "" {
if p, err := strconv.Atoi(cremotePortStr); err == nil {
cremotePort = p
}
}
// Get transport mode (stdio or http)
transportMode := os.Getenv("CREMOTE_TRANSPORT")
if transportMode == "" {
transportMode = "stdio" // Default to stdio for backward compatibility
}
log.Printf("Starting cremote MCP server, transport: %s, connecting to cremote daemon at %s:%d", transportMode, cremoteHost, cremotePort)
if transportMode == "http" {
// Multi-client HTTP mode
sessionManager := NewSessionManager(cremoteHost, cremotePort)
startHTTPServer(sessionManager)
} else {
// Single-client stdio mode (legacy)
cremoteServer := NewCremoteServer(cremoteHost, cremotePort)
startStdioServer(cremoteServer)
}
}
func startStdioServer(cremoteServer *CremoteServer) {
// Create MCP server
mcpServer := server.NewMCPServer("cremote-mcp", "2.0.0")
// Create session-aware tool handlers
toolHandlers := createToolHandlers(func() ClientContext {
return cremoteServer
})
// Register all tools
registerAllTools(mcpServer, toolHandlers)
// Start the server
log.Printf("Cremote MCP server ready (stdio mode)")
if err := server.ServeStdio(mcpServer); err != nil {
log.Fatalf("Server error: %v", err)
}
}
func startHTTPServer(sessionManager *SessionManager) {
// Start session cleanup routine
go func() {
ticker := time.NewTicker(5 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ticker.C:
sessionManager.CleanupExpiredSessions(30 * time.Minute)
}
}
}()
// Get HTTP server configuration
httpHost := os.Getenv("CREMOTE_HTTP_HOST")
if httpHost == "" {
httpHost = "localhost"
}
httpPortStr := os.Getenv("CREMOTE_HTTP_PORT")
httpPort := 8990
if httpPortStr != "" {
if p, err := strconv.Atoi(httpPortStr); err == nil {
httpPort = p
}
}
// Create HTTP server
mux := http.NewServeMux()
mux.HandleFunc("/mcp", func(w http.ResponseWriter, r *http.Request) {
handleMCPRequest(w, r, sessionManager)
})
server := &http.Server{
Addr: fmt.Sprintf("%s:%d", httpHost, httpPort),
Handler: mux,
}
log.Printf("Cremote MCP HTTP server ready on %s", server.Addr)
if err := server.ListenAndServe(); err != nil {
log.Fatalf("HTTP server error: %v", err)
}
}
// handleMCPRequest handles HTTP MCP requests with session management
func handleMCPRequest(w http.ResponseWriter, r *http.Request, sessionManager *SessionManager) {
// Set CORS headers
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, DELETE, OPTIONS")
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Accept, Mcp-Session-Id, MCP-Protocol-Version")
if r.Method == "OPTIONS" {
w.WriteHeader(http.StatusOK)
return
}
// Get or create session
sessionID := r.Header.Get("Mcp-Session-Id")
var session *ClientSession
var isNewSession bool
if sessionID == "" {
// Create new session for initialization
session = sessionManager.CreateSession()
isNewSession = true
w.Header().Set("Mcp-Session-Id", session.ID)
} else {
// Get existing session
var exists bool
session, exists = sessionManager.GetSession(sessionID)
if !exists {
http.Error(w, "Session not found", http.StatusNotFound)
return
}
}
// Handle different HTTP methods according to MCP Streamable HTTP spec
switch r.Method {
case "POST":
handleMCPPost(w, r, session, isNewSession)
case "GET":
handleMCPGet(w, r, session)
case "DELETE":
handleMCPDelete(w, r, sessionManager, sessionID)
default:
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
}
}
// handleMCPPost handles POST requests (JSON-RPC messages)
func handleMCPPost(w http.ResponseWriter, r *http.Request, session *ClientSession, isNewSession bool) {
// Read request body
var jsonRPCRequest map[string]interface{}
if err := json.NewDecoder(r.Body).Decode(&jsonRPCRequest); err != nil {
http.Error(w, "Invalid JSON", http.StatusBadRequest)
return
}
// For now, implement a basic response for initialization
// TODO: Implement full JSON-RPC handling with session-aware tools
method, ok := jsonRPCRequest["method"].(string)
if !ok {
http.Error(w, "Missing method", http.StatusBadRequest)
return
}
if method == "initialize" {
// Handle initialization
response := map[string]interface{}{
"jsonrpc": "2.0",
"id": jsonRPCRequest["id"],
"result": map[string]interface{}{
"protocolVersion": "2025-06-18",
"capabilities": map[string]interface{}{
"tools": map[string]interface{}{},
},
"serverInfo": map[string]interface{}{
"name": "cremote-mcp",
"version": Version,
},
},
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(response)
return
}
// For other methods, return not implemented for now
w.WriteHeader(http.StatusNotImplemented)
w.Write([]byte("Method not yet implemented in HTTP transport"))
}
// handleMCPGet handles GET requests (SSE streams)
func handleMCPGet(w http.ResponseWriter, r *http.Request, session *ClientSession) {
// For now, return method not allowed as we don't implement SSE yet
http.Error(w, "SSE not yet implemented", http.StatusMethodNotAllowed)
}
// handleMCPDelete handles DELETE requests (session termination)
func handleMCPDelete(w http.ResponseWriter, r *http.Request, sessionManager *SessionManager, sessionID string) {
if sessionID == "" {
http.Error(w, "No session to delete", http.StatusBadRequest)
return
}
sessionManager.RemoveSession(sessionID)
w.WriteHeader(http.StatusOK)
w.Write([]byte("Session terminated"))
}