accessibility

This commit is contained in:
Josh at WLTechBlog 2025-08-29 12:11:54 -05:00
parent 6bad614f9e
commit 7f4d8b8e84
12 changed files with 2708 additions and 1577 deletions

View File

@ -262,22 +262,18 @@ cremote close-tab --tab="$TAB1"
Many commands support timeout parameters for robust testing: Many commands support timeout parameters for robust testing:
```bash ```bash
# Wait up to 10 seconds for element to appear, then 5 seconds for action # Wait up to 10 seconds for operation to complete
cremote fill-form --selector="#slow-loading-field" --value="test" \ cremote fill-form --selector="#slow-loading-field" --value="test" --timeout=10
--selection-timeout=10 --action-timeout=5
# Wait for elements that load dynamically # Wait for elements that load dynamically
cremote click-element --selector=".ajax-button" \ cremote click-element --selector=".ajax-button" --timeout=15
--selection-timeout=15 --action-timeout=10
# Get elements that may take time to render # Get elements that may take time to render
cremote get-element --selector=".dynamic-content" --selection-timeout=20 cremote get-element --selector=".dynamic-content" --timeout=20
``` ```
**Timeout Parameters:** **Timeout Parameter:**
- `--selection-timeout`: Seconds to wait for element to appear in DOM (default: 5 seconds) - `--timeout`: Seconds to wait for operation to complete (default: 5 seconds)
- `--action-timeout`: Seconds to wait for action to complete (default: 5 seconds)
- `--timeout`: General timeout for operations (default: 5 seconds)
**Smart Navigation Waiting:** **Smart Navigation Waiting:**
The `wait-navigation` command intelligently detects if navigation is actually happening: The `wait-navigation` command intelligently detects if navigation is actually happening:
@ -328,7 +324,7 @@ cremote load-url --url="https://myapp.com/register"
cremote click-element --selector="#submit-btn" cremote click-element --selector="#submit-btn"
# Check for validation errors # Check for validation errors
ERROR_MSG=$(cremote get-element --selector=".error-message" --selection-timeout=5) ERROR_MSG=$(cremote get-element --selector=".error-message" --timeout=5)
if [ -n "$ERROR_MSG" ]; then if [ -n "$ERROR_MSG" ]; then
echo "✓ Validation working: $ERROR_MSG" echo "✓ Validation working: $ERROR_MSG"
else else
@ -340,7 +336,7 @@ cremote fill-form --selector="#email" --value="invalid-email"
cremote click-element --selector="#submit-btn" cremote click-element --selector="#submit-btn"
# Verify email validation # Verify email validation
EMAIL_ERROR=$(cremote get-element --selector="#email-error" --selection-timeout=5) EMAIL_ERROR=$(cremote get-element --selector="#email-error" --timeout=5)
if echo "$EMAIL_ERROR" | grep -q "valid email"; then if echo "$EMAIL_ERROR" | grep -q "valid email"; then
echo "✓ Email validation working" echo "✓ Email validation working"
fi fi
@ -408,7 +404,7 @@ echo "Error handling: $ERROR_RESPONSE"
# Test file upload limits # Test file upload limits
cremote upload-file --selector="#file-upload" --file="/path/to/large-file.zip" cremote upload-file --selector="#file-upload" --file="/path/to/large-file.zip"
UPLOAD_ERROR=$(cremote get-element --selector=".upload-error" --selection-timeout=10) UPLOAD_ERROR=$(cremote get-element --selector=".upload-error" --timeout=10)
# Test iframe interaction (e.g., payment form) # Test iframe interaction (e.g., payment form)
cremote switch-iframe --selector="iframe.payment-widget" cremote switch-iframe --selector="iframe.payment-widget"
@ -417,12 +413,12 @@ cremote fill-form --selector="#expiry" --value="12/25"
cremote click-element --selector="#pay-now" cremote click-element --selector="#pay-now"
# Check for payment processing within iframe # Check for payment processing within iframe
PAYMENT_STATUS=$(cremote get-element --selector=".payment-status" --selection-timeout=10) PAYMENT_STATUS=$(cremote get-element --selector=".payment-status" --timeout=10)
echo "Payment status: $PAYMENT_STATUS" echo "Payment status: $PAYMENT_STATUS"
# Switch back to main page to check results # Switch back to main page to check results
cremote switch-main cremote switch-main
MAIN_STATUS=$(cremote get-element --selector=".order-confirmation" --selection-timeout=10) MAIN_STATUS=$(cremote get-element --selector=".order-confirmation" --timeout=10)
``` ```
## Testing Best Practices ## Testing Best Practices
@ -448,10 +444,10 @@ Always use appropriate timeouts for dynamic content:
```bash ```bash
# Wait for AJAX content to load # Wait for AJAX content to load
cremote get-element --selector=".search-results" --selection-timeout=15 cremote get-element --selector=".search-results" --timeout=15
# Wait for form submission to complete # Wait for form submission to complete
cremote submit-form --selector="#payment-form" --action-timeout=30 cremote submit-form --selector="#payment-form" --timeout=30
cremote wait-navigation --timeout=20 cremote wait-navigation --timeout=20
``` ```
@ -470,7 +466,7 @@ fi
# After form submission, check for success message # After form submission, check for success message
cremote submit-form --selector="#contact-form" cremote submit-form --selector="#contact-form"
SUCCESS_MSG=$(cremote get-element --selector=".success-message" --selection-timeout=10) SUCCESS_MSG=$(cremote get-element --selector=".success-message" --timeout=10)
if echo "$SUCCESS_MSG" | grep -q "Thank you"; then if echo "$SUCCESS_MSG" | grep -q "Thank you"; then
echo "✓ Form submitted successfully" echo "✓ Form submitted successfully"
fi fi
@ -501,7 +497,7 @@ cremote fill-form --selector="#card-number" --value="4111111111111111"
cremote switch-main # Always switch back cremote switch-main # Always switch back
# Good - verify iframe exists before switching # Good - verify iframe exists before switching
IFRAME_EXISTS=$(cremote get-element --selector="iframe.payment-form" --selection-timeout=5) IFRAME_EXISTS=$(cremote get-element --selector="iframe.payment-form" --timeout=5)
if [ -n "$IFRAME_EXISTS" ]; then if [ -n "$IFRAME_EXISTS" ]; then
cremote switch-iframe --selector="iframe.payment-form" cremote switch-iframe --selector="iframe.payment-form"
# ... iframe operations ... # ... iframe operations ...
@ -534,7 +530,7 @@ cremote list-tabs
```bash ```bash
# Test if element exists before interacting # Test if element exists before interacting
ELEMENT=$(cremote get-element --selector="#target-button" --selection-timeout=5) ELEMENT=$(cremote get-element --selector="#target-button" --timeout=5)
if [ -n "$ELEMENT" ]; then if [ -n "$ELEMENT" ]; then
cremote click-element --selector="#target-button" cremote click-element --selector="#target-button"
else else
@ -546,8 +542,7 @@ fi
```bash ```bash
# For slow-loading applications # For slow-loading applications
cremote fill-form --selector="#username" --value="test" \ cremote fill-form --selector="#username" --value="test" --timeout=30
--selection-timeout=30 --action-timeout=15
cremote wait-navigation --timeout=60 cremote wait-navigation --timeout=60
``` ```

View File

@ -139,11 +139,10 @@ The `--timeout` parameter specifies how many seconds to wait for the URL to load
#### Fill a form field #### Fill a form field
```bash ```bash
cremote fill-form --tab="<tab-id>" --selector="#username" --value="user123" [--selection-timeout=5] [--action-timeout=5] cremote fill-form --tab="<tab-id>" --selector="#username" --value="user123" [--timeout=5]
``` ```
The `--selection-timeout` parameter specifies how many seconds to wait for the element to appear in the DOM (default: 5 seconds). The `--timeout` parameter specifies how many seconds to wait for the fill operation to complete (default: 5 seconds).
The `--action-timeout` parameter specifies how many seconds to wait for the fill action to complete (default: 5 seconds).
#### Check/uncheck a checkbox or select a radio button #### Check/uncheck a checkbox or select a radio button
@ -180,26 +179,24 @@ The command automatically detects dropdown elements and tries both option text a
#### Upload a file #### Upload a file
```bash ```bash
cremote upload-file --tab="<tab-id>" --selector="input[type=file]" --file="/path/to/file.jpg" [--selection-timeout=5] [--action-timeout=5] cremote upload-file --tab="<tab-id>" --selector="input[type=file]" --file="/path/to/file.jpg" [--timeout=5]
``` ```
This command automatically: This command automatically:
1. **Transfers the file** from your local machine to the daemon container (if running in a container) 1. **Transfers the file** from your local machine to the daemon container (if running in a container)
2. **Uploads the file** to the specified file input element on the web page 2. **Uploads the file** to the specified file input element on the web page
The `--selection-timeout` parameter specifies how many seconds to wait for the element to appear in the DOM (default: 5 seconds). The `--timeout` parameter specifies how many seconds to wait for the upload operation to complete (default: 5 seconds).
The `--action-timeout` parameter specifies how many seconds to wait for the upload action to complete (default: 5 seconds).
**Note**: The file path should be the local path on your machine. The command will handle transferring it to the daemon container automatically. **Note**: The file path should be the local path on your machine. The command will handle transferring it to the daemon container automatically.
#### Submit a form #### Submit a form
```bash ```bash
cremote submit-form --tab="<tab-id>" --selector="form#login" [--selection-timeout=5] [--action-timeout=5] cremote submit-form --tab="<tab-id>" --selector="form#login" [--timeout=5]
``` ```
The `--selection-timeout` parameter specifies how many seconds to wait for the element to appear in the DOM (default: 5 seconds). The `--timeout` parameter specifies how many seconds to wait for the form submission to complete (default: 5 seconds).
The `--action-timeout` parameter specifies how many seconds to wait for the form submission to complete (default: 5 seconds).
#### Get the source code of a page #### Get the source code of a page
@ -212,19 +209,18 @@ The `--timeout` parameter specifies how many seconds to wait for getting the pag
#### Get the HTML of an element #### Get the HTML of an element
```bash ```bash
cremote get-element --tab="<tab-id>" --selector=".content" [--selection-timeout=5] cremote get-element --tab="<tab-id>" --selector=".content" [--timeout=5]
``` ```
The `--selection-timeout` parameter specifies how many seconds to wait for the element to appear in the DOM (default: 5 seconds). The `--timeout` parameter specifies how many seconds to wait for the element to appear in the DOM (default: 5 seconds).
#### Click on an element #### Click on an element
```bash ```bash
cremote click-element --tab="<tab-id>" --selector="button.submit" [--selection-timeout=5] [--action-timeout=5] cremote click-element --tab="<tab-id>" --selector="button.submit" [--timeout=5]
``` ```
The `--selection-timeout` parameter specifies how many seconds to wait for the element to appear in the DOM (default: 5 seconds). The `--timeout` parameter specifies how many seconds to wait for the click operation to complete (default: 5 seconds).
The `--action-timeout` parameter specifies how many seconds to wait for the click action to complete (default: 5 seconds).
#### Close a tab #### Close a tab

View File

@ -257,8 +257,8 @@ func (c *Client) LoadURL(tabID, url string, timeout int) error {
// FillFormField fills a form field with a value // FillFormField fills a form field with a value
// If tabID is empty, the current tab will be used // If tabID is empty, the current tab will be used
// selectionTimeout and actionTimeout are in seconds, 0 means no timeout // timeout is in seconds, 0 means no timeout
func (c *Client) FillFormField(tabID, selector, value string, selectionTimeout, actionTimeout int) error { func (c *Client) FillFormField(tabID, selector, value string, timeout int) error {
params := map[string]string{ params := map[string]string{
"selector": selector, "selector": selector,
"value": value, "value": value,
@ -269,13 +269,9 @@ func (c *Client) FillFormField(tabID, selector, value string, selectionTimeout,
params["tab"] = tabID params["tab"] = tabID
} }
// Add timeouts if specified // Add timeout if specified
if selectionTimeout > 0 { if timeout > 0 {
params["selection-timeout"] = strconv.Itoa(selectionTimeout) params["timeout"] = strconv.Itoa(timeout)
}
if actionTimeout > 0 {
params["action-timeout"] = strconv.Itoa(actionTimeout)
} }
resp, err := c.SendCommand("fill-form", params) resp, err := c.SendCommand("fill-form", params)
@ -292,8 +288,8 @@ func (c *Client) FillFormField(tabID, selector, value string, selectionTimeout,
// UploadFile uploads a file to a file input // UploadFile uploads a file to a file input
// If tabID is empty, the current tab will be used // If tabID is empty, the current tab will be used
// selectionTimeout and actionTimeout are in seconds, 0 means no timeout // timeout is in seconds, 0 means no timeout
func (c *Client) UploadFile(tabID, selector, filePath string, selectionTimeout, actionTimeout int) error { func (c *Client) UploadFile(tabID, selector, filePath string, timeout int) error {
params := map[string]string{ params := map[string]string{
"selector": selector, "selector": selector,
"file": filePath, "file": filePath,
@ -304,13 +300,9 @@ func (c *Client) UploadFile(tabID, selector, filePath string, selectionTimeout,
params["tab"] = tabID params["tab"] = tabID
} }
// Add timeouts if specified // Add timeout if specified
if selectionTimeout > 0 { if timeout > 0 {
params["selection-timeout"] = strconv.Itoa(selectionTimeout) params["timeout"] = strconv.Itoa(timeout)
}
if actionTimeout > 0 {
params["action-timeout"] = strconv.Itoa(actionTimeout)
} }
resp, err := c.SendCommand("upload-file", params) resp, err := c.SendCommand("upload-file", params)
@ -327,8 +319,8 @@ func (c *Client) UploadFile(tabID, selector, filePath string, selectionTimeout,
// SelectElement selects an option in a select dropdown // SelectElement selects an option in a select dropdown
// If tabID is empty, the current tab will be used // If tabID is empty, the current tab will be used
// selectionTimeout and actionTimeout are in seconds, 0 means no timeout // timeout is in seconds, 0 means no timeout
func (c *Client) SelectElement(tabID, selector, value string, selectionTimeout, actionTimeout int) error { func (c *Client) SelectElement(tabID, selector, value string, timeout int) error {
params := map[string]string{ params := map[string]string{
"selector": selector, "selector": selector,
"value": value, "value": value,
@ -339,13 +331,9 @@ func (c *Client) SelectElement(tabID, selector, value string, selectionTimeout,
params["tab"] = tabID params["tab"] = tabID
} }
// Add timeouts if specified // Add timeout if specified
if selectionTimeout > 0 { if timeout > 0 {
params["selection-timeout"] = strconv.Itoa(selectionTimeout) params["timeout"] = strconv.Itoa(timeout)
}
if actionTimeout > 0 {
params["action-timeout"] = strconv.Itoa(actionTimeout)
} }
resp, err := c.SendCommand("select-element", params) resp, err := c.SendCommand("select-element", params)
@ -362,8 +350,8 @@ func (c *Client) SelectElement(tabID, selector, value string, selectionTimeout,
// SubmitForm submits a form // SubmitForm submits a form
// If tabID is empty, the current tab will be used // If tabID is empty, the current tab will be used
// selectionTimeout and actionTimeout are in seconds, 0 means no timeout // timeout is in seconds, 0 means no timeout
func (c *Client) SubmitForm(tabID, selector string, selectionTimeout, actionTimeout int) error { func (c *Client) SubmitForm(tabID, selector string, timeout int) error {
params := map[string]string{ params := map[string]string{
"selector": selector, "selector": selector,
} }
@ -373,13 +361,9 @@ func (c *Client) SubmitForm(tabID, selector string, selectionTimeout, actionTime
params["tab"] = tabID params["tab"] = tabID
} }
// Add timeouts if specified // Add timeout if specified
if selectionTimeout > 0 { if timeout > 0 {
params["selection-timeout"] = strconv.Itoa(selectionTimeout) params["timeout"] = strconv.Itoa(timeout)
}
if actionTimeout > 0 {
params["action-timeout"] = strconv.Itoa(actionTimeout)
} }
resp, err := c.SendCommand("submit-form", params) resp, err := c.SendCommand("submit-form", params)
@ -429,8 +413,8 @@ func (c *Client) GetPageSource(tabID string, timeout int) (string, error) {
// GetElementHTML gets the HTML of an element // GetElementHTML gets the HTML of an element
// If tabID is empty, the current tab will be used // If tabID is empty, the current tab will be used
// selectionTimeout is in seconds, 0 means no timeout // timeout is in seconds, 0 means no timeout
func (c *Client) GetElementHTML(tabID, selector string, selectionTimeout int) (string, error) { func (c *Client) GetElementHTML(tabID, selector string, timeout int) (string, error) {
params := map[string]string{ params := map[string]string{
"selector": selector, "selector": selector,
} }
@ -441,8 +425,8 @@ func (c *Client) GetElementHTML(tabID, selector string, selectionTimeout int) (s
} }
// Add timeout if specified // Add timeout if specified
if selectionTimeout > 0 { if timeout > 0 {
params["selection-timeout"] = strconv.Itoa(selectionTimeout) params["timeout"] = strconv.Itoa(timeout)
} }
resp, err := c.SendCommand("get-element", params) resp, err := c.SendCommand("get-element", params)
@ -644,8 +628,8 @@ func (c *Client) SwitchToMain(tabID string, timeout int) error {
// ClickElement clicks on an element // ClickElement clicks on an element
// If tabID is empty, the current tab will be used // If tabID is empty, the current tab will be used
// selectionTimeout and actionTimeout are in seconds, 0 means no timeout // timeout is in seconds, 0 means no timeout
func (c *Client) ClickElement(tabID, selector string, selectionTimeout, actionTimeout int) error { func (c *Client) ClickElement(tabID, selector string, timeout int) error {
params := map[string]string{ params := map[string]string{
"selector": selector, "selector": selector,
} }
@ -655,13 +639,9 @@ func (c *Client) ClickElement(tabID, selector string, selectionTimeout, actionTi
params["tab"] = tabID params["tab"] = tabID
} }
// Add timeouts if specified // Add timeout if specified
if selectionTimeout > 0 { if timeout > 0 {
params["selection-timeout"] = strconv.Itoa(selectionTimeout) params["timeout"] = strconv.Itoa(timeout)
}
if actionTimeout > 0 {
params["action-timeout"] = strconv.Itoa(actionTimeout)
} }
resp, err := c.SendCommand("click-element", params) resp, err := c.SendCommand("click-element", params)
@ -2137,6 +2117,214 @@ func (c *Client) ScreenshotElement(tabID, selector, outputPath string, timeout i
return nil return nil
} }
// Accessibility tree data structures (matching daemon types)
// AXNode represents a node in the accessibility tree
type AXNode struct {
NodeID string `json:"nodeId"`
Ignored bool `json:"ignored"`
IgnoredReasons []AXProperty `json:"ignoredReasons,omitempty"`
Role *AXValue `json:"role,omitempty"`
ChromeRole *AXValue `json:"chromeRole,omitempty"`
Name *AXValue `json:"name,omitempty"`
Description *AXValue `json:"description,omitempty"`
Value *AXValue `json:"value,omitempty"`
Properties []AXProperty `json:"properties,omitempty"`
ParentID string `json:"parentId,omitempty"`
ChildIDs []string `json:"childIds,omitempty"`
BackendDOMNodeID int `json:"backendDOMNodeId,omitempty"`
FrameID string `json:"frameId,omitempty"`
}
// AXProperty represents a property of an accessibility node
type AXProperty struct {
Name string `json:"name"`
Value *AXValue `json:"value"`
}
// AXValue represents a computed accessibility value
type AXValue struct {
Type string `json:"type"`
Value interface{} `json:"value,omitempty"`
RelatedNodes []AXRelatedNode `json:"relatedNodes,omitempty"`
Sources []AXValueSource `json:"sources,omitempty"`
}
// AXRelatedNode represents a related node in the accessibility tree
type AXRelatedNode struct {
BackendDOMNodeID int `json:"backendDOMNodeId"`
IDRef string `json:"idref,omitempty"`
Text string `json:"text,omitempty"`
}
// AXValueSource represents a source for a computed accessibility value
type AXValueSource struct {
Type string `json:"type"`
Value *AXValue `json:"value,omitempty"`
Attribute string `json:"attribute,omitempty"`
AttributeValue *AXValue `json:"attributeValue,omitempty"`
Superseded bool `json:"superseded,omitempty"`
NativeSource string `json:"nativeSource,omitempty"`
NativeSourceValue *AXValue `json:"nativeSourceValue,omitempty"`
Invalid bool `json:"invalid,omitempty"`
InvalidReason string `json:"invalidReason,omitempty"`
}
// AccessibilityTreeResult represents the result of accessibility tree operations
type AccessibilityTreeResult struct {
Nodes []AXNode `json:"nodes"`
}
// AccessibilityQueryResult represents the result of accessibility queries
type AccessibilityQueryResult struct {
Nodes []AXNode `json:"nodes"`
}
// GetAccessibilityTree retrieves the full accessibility tree for a tab
// If tabID is empty, the current tab will be used
// depth limits the tree depth (optional, nil for full tree)
// timeout is in seconds, 0 means no timeout
func (c *Client) GetAccessibilityTree(tabID string, depth *int, timeout int) (*AccessibilityTreeResult, error) {
params := map[string]string{}
// Only include tab ID if it's provided
if tabID != "" {
params["tab"] = tabID
}
// Add depth if specified
if depth != nil {
params["depth"] = strconv.Itoa(*depth)
}
// Add timeout if specified
if timeout > 0 {
params["timeout"] = strconv.Itoa(timeout)
}
resp, err := c.SendCommand("get-accessibility-tree", params)
if err != nil {
return nil, err
}
if !resp.Success {
return nil, fmt.Errorf("failed to get accessibility tree: %s", resp.Error)
}
// Parse the response data
var result AccessibilityTreeResult
dataBytes, err := json.Marshal(resp.Data)
if err != nil {
return nil, fmt.Errorf("failed to marshal response data: %w", err)
}
err = json.Unmarshal(dataBytes, &result)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal accessibility tree result: %w", err)
}
return &result, nil
}
// GetPartialAccessibilityTree retrieves a partial accessibility tree for a specific element
// If tabID is empty, the current tab will be used
// selector is the CSS selector for the element to get the tree for
// fetchRelatives determines whether to include ancestors, siblings, and children
// timeout is in seconds, 0 means no timeout
func (c *Client) GetPartialAccessibilityTree(tabID, selector string, fetchRelatives bool, timeout int) (*AccessibilityTreeResult, error) {
params := map[string]string{
"selector": selector,
"fetch-relatives": strconv.FormatBool(fetchRelatives),
}
// Only include tab ID if it's provided
if tabID != "" {
params["tab"] = tabID
}
// Add timeout if specified
if timeout > 0 {
params["timeout"] = strconv.Itoa(timeout)
}
resp, err := c.SendCommand("get-partial-accessibility-tree", params)
if err != nil {
return nil, err
}
if !resp.Success {
return nil, fmt.Errorf("failed to get partial accessibility tree: %s", resp.Error)
}
// Parse the response data
var result AccessibilityTreeResult
dataBytes, err := json.Marshal(resp.Data)
if err != nil {
return nil, fmt.Errorf("failed to marshal response data: %w", err)
}
err = json.Unmarshal(dataBytes, &result)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal accessibility tree result: %w", err)
}
return &result, nil
}
// QueryAccessibilityTree queries the accessibility tree for nodes matching specific criteria
// If tabID is empty, the current tab will be used
// selector is optional CSS selector to limit the search scope
// accessibleName is optional accessible name to match
// role is optional role to match
// timeout is in seconds, 0 means no timeout
func (c *Client) QueryAccessibilityTree(tabID, selector, accessibleName, role string, timeout int) (*AccessibilityQueryResult, error) {
params := map[string]string{}
// Only include tab ID if it's provided
if tabID != "" {
params["tab"] = tabID
}
// Add optional parameters
if selector != "" {
params["selector"] = selector
}
if accessibleName != "" {
params["accessible-name"] = accessibleName
}
if role != "" {
params["role"] = role
}
// Add timeout if specified
if timeout > 0 {
params["timeout"] = strconv.Itoa(timeout)
}
resp, err := c.SendCommand("query-accessibility-tree", params)
if err != nil {
return nil, err
}
if !resp.Success {
return nil, fmt.Errorf("failed to query accessibility tree: %s", resp.Error)
}
// Parse the response data
var result AccessibilityQueryResult
dataBytes, err := json.Marshal(resp.Data)
if err != nil {
return nil, fmt.Errorf("failed to marshal response data: %w", err)
}
err = json.Unmarshal(dataBytes, &result)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal accessibility query result: %w", err)
}
return &result, nil
}
// ScreenshotEnhanced takes a screenshot with metadata // ScreenshotEnhanced takes a screenshot with metadata
// If tabID is empty, the current tab will be used // If tabID is empty, the current tab will be used
// timeout is in seconds, 0 means no timeout // timeout is in seconds, 0 means no timeout

View File

@ -957,6 +957,84 @@ func (d *Daemon) handleCommand(w http.ResponseWriter, r *http.Request) {
response = Response{Success: true, Data: result} response = Response{Success: true, Data: result}
} }
// Accessibility tree commands
case "get-accessibility-tree":
tabID := cmd.Params["tab"]
depth := cmd.Params["depth"]
timeoutStr := cmd.Params["timeout"]
// Parse timeout (default to 5 seconds if not specified)
timeout := 5
if timeoutStr != "" {
if parsedTimeout, err := strconv.Atoi(timeoutStr); err == nil && parsedTimeout > 0 {
timeout = parsedTimeout
}
}
// Parse depth (optional)
var depthInt *int
if depth != "" {
if parsedDepth, err := strconv.Atoi(depth); err == nil && parsedDepth >= 0 {
depthInt = &parsedDepth
}
}
result, err := d.getAccessibilityTree(tabID, depthInt, timeout)
if err != nil {
response = Response{Success: false, Error: err.Error()}
} else {
response = Response{Success: true, Data: result}
}
case "get-partial-accessibility-tree":
tabID := cmd.Params["tab"]
selector := cmd.Params["selector"]
fetchRelatives := cmd.Params["fetch-relatives"] // "true" or "false"
timeoutStr := cmd.Params["timeout"]
// Parse timeout (default to 5 seconds if not specified)
timeout := 5
if timeoutStr != "" {
if parsedTimeout, err := strconv.Atoi(timeoutStr); err == nil && parsedTimeout > 0 {
timeout = parsedTimeout
}
}
// Parse fetchRelatives (default to true)
fetchRel := true
if fetchRelatives == "false" {
fetchRel = false
}
result, err := d.getPartialAccessibilityTree(tabID, selector, fetchRel, timeout)
if err != nil {
response = Response{Success: false, Error: err.Error()}
} else {
response = Response{Success: true, Data: result}
}
case "query-accessibility-tree":
tabID := cmd.Params["tab"]
selector := cmd.Params["selector"]
accessibleName := cmd.Params["accessible-name"]
role := cmd.Params["role"]
timeoutStr := cmd.Params["timeout"]
// Parse timeout (default to 5 seconds if not specified)
timeout := 5
if timeoutStr != "" {
if parsedTimeout, err := strconv.Atoi(timeoutStr); err == nil && parsedTimeout > 0 {
timeout = parsedTimeout
}
}
result, err := d.queryAccessibilityTree(tabID, selector, accessibleName, role, timeout)
if err != nil {
response = Response{Success: false, Error: err.Error()}
} else {
response = Response{Success: true, Data: result}
}
default: default:
d.debugLog("Unknown action: %s", cmd.Action) d.debugLog("Unknown action: %s", cmd.Action)
response = Response{Success: false, Error: "Unknown action"} response = Response{Success: false, Error: "Unknown action"}
@ -4828,3 +4906,368 @@ func (d *Daemon) getFileInfo(filePath string, result *FileManagementResult) (*Fi
d.debugLog("Retrieved info for file: %s", filePath) d.debugLog("Retrieved info for file: %s", filePath)
return result, nil return result, nil
} }
// Accessibility tree data structures
// AXNode represents a node in the accessibility tree
type AXNode struct {
NodeID string `json:"nodeId"`
Ignored bool `json:"ignored"`
IgnoredReasons []AXProperty `json:"ignoredReasons,omitempty"`
Role *AXValue `json:"role,omitempty"`
ChromeRole *AXValue `json:"chromeRole,omitempty"`
Name *AXValue `json:"name,omitempty"`
Description *AXValue `json:"description,omitempty"`
Value *AXValue `json:"value,omitempty"`
Properties []AXProperty `json:"properties,omitempty"`
ParentID string `json:"parentId,omitempty"`
ChildIDs []string `json:"childIds,omitempty"`
BackendDOMNodeID int `json:"backendDOMNodeId,omitempty"`
FrameID string `json:"frameId,omitempty"`
}
// AXProperty represents a property of an accessibility node
type AXProperty struct {
Name string `json:"name"`
Value *AXValue `json:"value"`
}
// AXValue represents a computed accessibility value
type AXValue struct {
Type string `json:"type"`
Value interface{} `json:"value,omitempty"`
RelatedNodes []AXRelatedNode `json:"relatedNodes,omitempty"`
Sources []AXValueSource `json:"sources,omitempty"`
}
// AXRelatedNode represents a related node in the accessibility tree
type AXRelatedNode struct {
BackendDOMNodeID int `json:"backendDOMNodeId"`
IDRef string `json:"idref,omitempty"`
Text string `json:"text,omitempty"`
}
// AXValueSource represents a source for a computed accessibility value
type AXValueSource struct {
Type string `json:"type"`
Value *AXValue `json:"value,omitempty"`
Attribute string `json:"attribute,omitempty"`
AttributeValue *AXValue `json:"attributeValue,omitempty"`
Superseded bool `json:"superseded,omitempty"`
NativeSource string `json:"nativeSource,omitempty"`
NativeSourceValue *AXValue `json:"nativeSourceValue,omitempty"`
Invalid bool `json:"invalid,omitempty"`
InvalidReason string `json:"invalidReason,omitempty"`
}
// AccessibilityTreeResult represents the result of accessibility tree operations
type AccessibilityTreeResult struct {
Nodes []AXNode `json:"nodes"`
}
// AccessibilityQueryResult represents the result of accessibility queries
type AccessibilityQueryResult struct {
Nodes []AXNode `json:"nodes"`
}
// getAccessibilityTree retrieves the full accessibility tree for a tab
func (d *Daemon) getAccessibilityTree(tabID string, depth *int, timeout int) (*AccessibilityTreeResult, error) {
d.debugLog("Getting accessibility tree for tab: %s with depth: %v, timeout: %d", tabID, depth, timeout)
// Use current tab if not specified
if tabID == "" {
tabID = d.currentTab
}
if tabID == "" {
return nil, fmt.Errorf("no tab specified and no current tab available")
}
page, err := d.getTab(tabID)
if err != nil {
return nil, fmt.Errorf("failed to get page: %v", err)
}
// Enable accessibility domain
err = proto.AccessibilityEnable{}.Call(page)
if err != nil {
return nil, fmt.Errorf("failed to enable accessibility domain: %v", err)
}
// Build the request parameters
params := proto.AccessibilityGetFullAXTree{}
if depth != nil {
params.Depth = depth
}
// Call the Chrome DevTools Protocol Accessibility.getFullAXTree method
result, err := proto.AccessibilityGetFullAXTree{}.Call(page)
if err != nil {
return nil, fmt.Errorf("failed to get accessibility tree: %v", err)
}
// Parse the result
var axResult AccessibilityTreeResult
for _, node := range result.Nodes {
axNode := d.convertProtoAXNode(node)
axResult.Nodes = append(axResult.Nodes, axNode)
}
d.debugLog("Successfully retrieved accessibility tree with %d nodes for tab: %s", len(axResult.Nodes), tabID)
return &axResult, nil
}
// convertProtoAXNode converts a proto.AccessibilityAXNode to our AXNode struct
func (d *Daemon) convertProtoAXNode(protoNode *proto.AccessibilityAXNode) AXNode {
node := AXNode{
NodeID: string(protoNode.NodeID),
Ignored: protoNode.Ignored,
BackendDOMNodeID: int(protoNode.BackendDOMNodeID),
}
// Convert role
if protoNode.Role != nil {
node.Role = d.convertProtoAXValue(protoNode.Role)
}
// Convert chrome role
if protoNode.ChromeRole != nil {
node.ChromeRole = d.convertProtoAXValue(protoNode.ChromeRole)
}
// Convert name
if protoNode.Name != nil {
node.Name = d.convertProtoAXValue(protoNode.Name)
}
// Convert description
if protoNode.Description != nil {
node.Description = d.convertProtoAXValue(protoNode.Description)
}
// Convert value
if protoNode.Value != nil {
node.Value = d.convertProtoAXValue(protoNode.Value)
}
// Convert properties
for _, prop := range protoNode.Properties {
node.Properties = append(node.Properties, AXProperty{
Name: string(prop.Name),
Value: d.convertProtoAXValue(prop.Value),
})
}
// Convert ignored reasons
for _, reason := range protoNode.IgnoredReasons {
node.IgnoredReasons = append(node.IgnoredReasons, AXProperty{
Name: string(reason.Name),
Value: d.convertProtoAXValue(reason.Value),
})
}
// Convert parent and child IDs
if protoNode.ParentID != "" {
node.ParentID = string(protoNode.ParentID)
}
for _, childID := range protoNode.ChildIDs {
node.ChildIDs = append(node.ChildIDs, string(childID))
}
if protoNode.FrameID != "" {
node.FrameID = string(protoNode.FrameID)
}
return node
}
// convertProtoAXValue converts a proto.AccessibilityAXValue to our AXValue struct
func (d *Daemon) convertProtoAXValue(protoValue *proto.AccessibilityAXValue) *AXValue {
if protoValue == nil {
return nil
}
value := &AXValue{
Type: string(protoValue.Type),
Value: protoValue.Value,
}
// Convert related nodes
for _, relatedNode := range protoValue.RelatedNodes {
value.RelatedNodes = append(value.RelatedNodes, AXRelatedNode{
BackendDOMNodeID: int(relatedNode.BackendDOMNodeID),
IDRef: relatedNode.Idref,
Text: relatedNode.Text,
})
}
// Convert sources
for _, source := range protoValue.Sources {
axSource := AXValueSource{
Type: string(source.Type),
Superseded: source.Superseded,
Invalid: source.Invalid,
InvalidReason: source.InvalidReason,
}
if source.Value != nil {
axSource.Value = d.convertProtoAXValue(source.Value)
}
if source.Attribute != "" {
axSource.Attribute = source.Attribute
}
if source.AttributeValue != nil {
axSource.AttributeValue = d.convertProtoAXValue(source.AttributeValue)
}
if source.NativeSource != "" {
axSource.NativeSource = string(source.NativeSource)
}
if source.NativeSourceValue != nil {
axSource.NativeSourceValue = d.convertProtoAXValue(source.NativeSourceValue)
}
value.Sources = append(value.Sources, axSource)
}
return value
}
// getPartialAccessibilityTree retrieves a partial accessibility tree for a specific element
func (d *Daemon) getPartialAccessibilityTree(tabID, selector string, fetchRelatives bool, timeout int) (*AccessibilityTreeResult, error) {
d.debugLog("Getting partial accessibility tree for tab: %s, selector: %s, fetchRelatives: %v, timeout: %d", tabID, selector, fetchRelatives, timeout)
// Use current tab if not specified
if tabID == "" {
tabID = d.currentTab
}
if tabID == "" {
return nil, fmt.Errorf("no tab specified and no current tab available")
}
page, err := d.getTab(tabID)
if err != nil {
return nil, fmt.Errorf("failed to get page: %v", err)
}
// Enable accessibility domain
err = proto.AccessibilityEnable{}.Call(page)
if err != nil {
return nil, fmt.Errorf("failed to enable accessibility domain: %v", err)
}
// Find the DOM node first
var element *rod.Element
if timeout > 0 {
element, err = page.Timeout(time.Duration(timeout) * time.Second).Element(selector)
} else {
element, err = page.Element(selector)
}
if err != nil {
return nil, fmt.Errorf("failed to find element: %w", err)
}
// Get the backend node ID
nodeInfo, err := element.Describe(1, false)
if err != nil {
return nil, fmt.Errorf("failed to describe element: %w", err)
}
// Call the Chrome DevTools Protocol Accessibility.getPartialAXTree method
result, err := proto.AccessibilityGetPartialAXTree{
BackendNodeID: nodeInfo.BackendNodeID,
FetchRelatives: fetchRelatives,
}.Call(page)
if err != nil {
return nil, fmt.Errorf("failed to get partial accessibility tree: %v", err)
}
// Parse the result
var axResult AccessibilityTreeResult
for _, node := range result.Nodes {
axNode := d.convertProtoAXNode(node)
axResult.Nodes = append(axResult.Nodes, axNode)
}
d.debugLog("Successfully retrieved partial accessibility tree with %d nodes for tab: %s", len(axResult.Nodes), tabID)
return &axResult, nil
}
// queryAccessibilityTree queries the accessibility tree for nodes matching specific criteria
func (d *Daemon) queryAccessibilityTree(tabID, selector, accessibleName, role string, timeout int) (*AccessibilityQueryResult, error) {
d.debugLog("Querying accessibility tree for tab: %s, selector: %s, name: %s, role: %s, timeout: %d", tabID, selector, accessibleName, role, timeout)
// Use current tab if not specified
if tabID == "" {
tabID = d.currentTab
}
if tabID == "" {
return nil, fmt.Errorf("no tab specified and no current tab available")
}
page, err := d.getTab(tabID)
if err != nil {
return nil, fmt.Errorf("failed to get page: %v", err)
}
// Enable accessibility domain
err = proto.AccessibilityEnable{}.Call(page)
if err != nil {
return nil, fmt.Errorf("failed to enable accessibility domain: %v", err)
}
// Find the DOM node first if selector is provided
var backendNodeID *proto.DOMBackendNodeID
if selector != "" {
var element *rod.Element
if timeout > 0 {
element, err = page.Timeout(time.Duration(timeout) * time.Second).Element(selector)
} else {
element, err = page.Element(selector)
}
if err != nil {
return nil, fmt.Errorf("failed to find element: %w", err)
}
// Get the backend node ID
nodeInfo, err := element.Describe(1, false)
if err != nil {
return nil, fmt.Errorf("failed to describe element: %w", err)
}
backendNodeID = &nodeInfo.BackendNodeID
}
// Build query parameters
queryParams := proto.AccessibilityQueryAXTree{}
if backendNodeID != nil {
queryParams.BackendNodeID = *backendNodeID
}
if accessibleName != "" {
queryParams.AccessibleName = accessibleName
}
if role != "" {
queryParams.Role = role
}
// Call the Chrome DevTools Protocol Accessibility.queryAXTree method
result, err := queryParams.Call(page)
if err != nil {
return nil, fmt.Errorf("failed to query accessibility tree: %v", err)
}
// Parse the result
var axResult AccessibilityQueryResult
for _, node := range result.Nodes {
axNode := d.convertProtoAXNode(node)
axResult.Nodes = append(axResult.Nodes, axNode)
}
d.debugLog("Successfully queried accessibility tree with %d matching nodes for tab: %s", len(axResult.Nodes), tabID)
return &axResult, nil
}

230
docs/accessibility_tree.md Normal file
View File

@ -0,0 +1,230 @@
# Accessibility Tree Support in Cremote
Cremote now supports interfacing with Chrome's accessibility tree through the Chrome DevTools Protocol. This enables AI agents and automation tools to understand and interact with web pages using accessibility information, which is crucial for building inclusive and robust web automation.
## Overview
The accessibility tree is a representation of the web page structure that assistive technologies (like screen readers) use to understand and navigate content. It provides semantic information about elements including their roles, names, descriptions, states, and relationships.
## Features
### 1. Full Accessibility Tree Retrieval
Get the complete accessibility tree for a page or limit the depth for performance.
### 2. Partial Accessibility Tree
Retrieve accessibility information for a specific element and its relatives (ancestors, siblings, children).
### 3. Accessibility Tree Queries
Search for elements by accessible name, ARIA role, or within a specific scope.
## API Reference
### Daemon Commands
#### `get-accessibility-tree`
Retrieves the full accessibility tree for a tab.
**Parameters:**
- `tab` (optional): Tab ID, uses current tab if not specified
- `depth` (optional): Maximum depth to retrieve, omit for full tree
- `timeout` (optional): Timeout in seconds, default 5
**Example:**
```bash
curl -X POST http://localhost:8989/command \
-H "Content-Type: application/json" \
-d '{"action": "get-accessibility-tree", "params": {"depth": "3"}}'
```
#### `get-partial-accessibility-tree`
Retrieves accessibility tree for a specific element.
**Parameters:**
- `selector`: CSS selector for the target element
- `tab` (optional): Tab ID, uses current tab if not specified
- `fetch-relatives` (optional): Whether to fetch relatives, default "true"
- `timeout` (optional): Timeout in seconds, default 5
**Example:**
```bash
curl -X POST http://localhost:8989/command \
-H "Content-Type: application/json" \
-d '{"action": "get-partial-accessibility-tree", "params": {"selector": "form", "fetch-relatives": "true"}}'
```
#### `query-accessibility-tree`
Queries the accessibility tree for nodes matching criteria.
**Parameters:**
- `tab` (optional): Tab ID, uses current tab if not specified
- `selector` (optional): CSS selector to limit search scope
- `accessible-name` (optional): Accessible name to match
- `role` (optional): ARIA role to match
- `timeout` (optional): Timeout in seconds, default 5
**Example:**
```bash
curl -X POST http://localhost:8989/command \
-H "Content-Type: application/json" \
-d '{"action": "query-accessibility-tree", "params": {"role": "button", "accessible-name": "Submit"}}'
```
### Client API
#### `GetAccessibilityTree(tabID string, depth *int, timeout int) (*AccessibilityTreeResult, error)`
Retrieves the full accessibility tree.
```go
// Get full tree
result, err := client.GetAccessibilityTree("", nil, 10)
// Get tree with limited depth
depth := 2
result, err := client.GetAccessibilityTree("tab123", &depth, 10)
```
#### `GetPartialAccessibilityTree(tabID, selector string, fetchRelatives bool, timeout int) (*AccessibilityTreeResult, error)`
Retrieves partial accessibility tree for an element.
```go
result, err := client.GetPartialAccessibilityTree("", "form", true, 10)
```
#### `QueryAccessibilityTree(tabID, selector, accessibleName, role string, timeout int) (*AccessibilityQueryResult, error)`
Queries accessibility tree by criteria.
```go
// Find all buttons
result, err := client.QueryAccessibilityTree("", "", "", "button", 10)
// Find element by accessible name
result, err := client.QueryAccessibilityTree("", "", "Submit", "", 10)
// Find buttons within a form
result, err := client.QueryAccessibilityTree("", "form", "", "button", 10)
```
### MCP Tools
#### `get_accessibility_tree_cremotemcp`
MCP tool for getting the full accessibility tree.
**Parameters:**
- `tab` (optional): Tab ID
- `depth` (optional): Maximum depth
- `timeout` (optional): Timeout in seconds
#### `get_partial_accessibility_tree_cremotemcp`
MCP tool for getting partial accessibility tree.
**Parameters:**
- `selector`: CSS selector for target element
- `tab` (optional): Tab ID
- `fetch_relatives` (optional): Include relatives, default true
- `timeout` (optional): Timeout in seconds
#### `query_accessibility_tree_cremotemcp`
MCP tool for querying accessibility tree.
**Parameters:**
- `tab` (optional): Tab ID
- `selector` (optional): CSS selector scope
- `accessible_name` (optional): Accessible name to match
- `role` (optional): ARIA role to match
- `timeout` (optional): Timeout in seconds
## Data Structures
### AXNode
Represents a node in the accessibility tree.
```go
type AXNode struct {
NodeID string `json:"nodeId"`
Ignored bool `json:"ignored"`
IgnoredReasons []AXProperty `json:"ignoredReasons,omitempty"`
Role *AXValue `json:"role,omitempty"`
ChromeRole *AXValue `json:"chromeRole,omitempty"`
Name *AXValue `json:"name,omitempty"`
Description *AXValue `json:"description,omitempty"`
Value *AXValue `json:"value,omitempty"`
Properties []AXProperty `json:"properties,omitempty"`
ParentID string `json:"parentId,omitempty"`
ChildIDs []string `json:"childIds,omitempty"`
BackendDOMNodeID int `json:"backendDOMNodeId,omitempty"`
FrameID string `json:"frameId,omitempty"`
}
```
### AXValue
Represents a computed accessibility value.
```go
type AXValue struct {
Type string `json:"type"`
Value interface{} `json:"value,omitempty"`
RelatedNodes []AXRelatedNode `json:"relatedNodes,omitempty"`
Sources []AXValueSource `json:"sources,omitempty"`
}
```
## Use Cases
### 1. Accessibility Testing
Verify that web pages have proper accessibility attributes and structure.
### 2. Screen Reader Simulation
Understand how assistive technologies would interpret the page.
### 3. Semantic Web Automation
Use semantic information for more robust element selection and interaction.
### 4. Form Analysis
Analyze form structure and labeling for accessibility compliance.
### 5. Content Analysis
Extract structured content based on semantic roles and relationships.
## Testing
Run the accessibility tree tests:
```bash
# Make the test script executable
chmod +x test_accessibility.sh
# Run the tests
./test_accessibility.sh
```
The test suite will:
1. Verify daemon connectivity
2. Test full accessibility tree retrieval
3. Test partial accessibility tree retrieval
4. Test accessibility tree queries by role and name
5. Test scoped queries
**Note**: The test files are located in the `tests/` directory to avoid conflicts with the main application build.
## Best Practices
1. **Use Appropriate Depth**: For performance, limit tree depth when full tree isn't needed
2. **Scope Queries**: Use CSS selectors to limit query scope for better performance
3. **Handle Ignored Nodes**: Check the `Ignored` field to filter out non-accessible elements
4. **Combine Criteria**: Use multiple search criteria for more precise queries
5. **Error Handling**: Always handle cases where elements might not have accessibility information
## Troubleshooting
### Common Issues
1. **Empty Results**: Some elements may not have accessibility information if they're decorative or improperly marked up
2. **Performance**: Large pages may have extensive accessibility trees; use depth limits or scoped queries
3. **Dynamic Content**: Accessibility tree may change as page content updates; re-query as needed
### Debug Tips
1. Use browser DevTools Accessibility panel to compare results
2. Check element roles and names in the browser first
3. Verify that accessibility features are enabled in Chrome
4. Test with simple pages first before complex applications

24
main.go
View File

@ -53,8 +53,7 @@ func main() {
fillFormTabID := fillFormCmd.String("tab", "", "Tab ID to fill form in (optional, uses current tab if not specified)") fillFormTabID := fillFormCmd.String("tab", "", "Tab ID to fill form in (optional, uses current tab if not specified)")
fillFormSelector := fillFormCmd.String("selector", "", "CSS selector for the input field") fillFormSelector := fillFormCmd.String("selector", "", "CSS selector for the input field")
fillFormValue := fillFormCmd.String("value", "", "Value to fill in the form field") fillFormValue := fillFormCmd.String("value", "", "Value to fill in the form field")
fillFormSelectionTimeout := fillFormCmd.Int("selection-timeout", 5, "Timeout in seconds for finding the element") fillFormTimeout := fillFormCmd.Int("timeout", 5, "Timeout in seconds for the fill operation")
fillFormActionTimeout := fillFormCmd.Int("action-timeout", 5, "Timeout in seconds for the fill action")
fillFormHost := fillFormCmd.String("host", "localhost", "Daemon host") fillFormHost := fillFormCmd.String("host", "localhost", "Daemon host")
fillFormPort := fillFormCmd.Int("port", 8989, "Daemon port") fillFormPort := fillFormCmd.Int("port", 8989, "Daemon port")
@ -62,16 +61,14 @@ func main() {
uploadFileTabID := uploadFileCmd.String("tab", "", "Tab ID to upload file in (optional, uses current tab if not specified)") uploadFileTabID := uploadFileCmd.String("tab", "", "Tab ID to upload file in (optional, uses current tab if not specified)")
uploadFileSelector := uploadFileCmd.String("selector", "", "CSS selector for the file input") uploadFileSelector := uploadFileCmd.String("selector", "", "CSS selector for the file input")
uploadFilePath := uploadFileCmd.String("file", "", "Path to the file to upload") uploadFilePath := uploadFileCmd.String("file", "", "Path to the file to upload")
uploadFileSelectionTimeout := uploadFileCmd.Int("selection-timeout", 5, "Timeout in seconds for finding the element") uploadFileTimeout := uploadFileCmd.Int("timeout", 5, "Timeout in seconds for the upload operation")
uploadFileActionTimeout := uploadFileCmd.Int("action-timeout", 5, "Timeout in seconds for the upload action")
uploadFileHost := uploadFileCmd.String("host", "localhost", "Daemon host") uploadFileHost := uploadFileCmd.String("host", "localhost", "Daemon host")
uploadFilePort := uploadFileCmd.Int("port", 8989, "Daemon port") uploadFilePort := uploadFileCmd.Int("port", 8989, "Daemon port")
// submit-form flags // submit-form flags
submitFormTabID := submitFormCmd.String("tab", "", "Tab ID to submit form in (optional, uses current tab if not specified)") submitFormTabID := submitFormCmd.String("tab", "", "Tab ID to submit form in (optional, uses current tab if not specified)")
submitFormSelector := submitFormCmd.String("selector", "", "CSS selector for the form") submitFormSelector := submitFormCmd.String("selector", "", "CSS selector for the form")
submitFormSelectionTimeout := submitFormCmd.Int("selection-timeout", 5, "Timeout in seconds for finding the element") submitFormTimeout := submitFormCmd.Int("timeout", 5, "Timeout in seconds for the submit operation")
submitFormActionTimeout := submitFormCmd.Int("action-timeout", 5, "Timeout in seconds for the submit action")
submitFormHost := submitFormCmd.String("host", "localhost", "Daemon host") submitFormHost := submitFormCmd.String("host", "localhost", "Daemon host")
submitFormPort := submitFormCmd.Int("port", 8989, "Daemon port") submitFormPort := submitFormCmd.Int("port", 8989, "Daemon port")
@ -84,15 +81,14 @@ func main() {
// get-element flags // get-element flags
getElementTabID := getElementCmd.String("tab", "", "Tab ID to get element from (optional, uses current tab if not specified)") getElementTabID := getElementCmd.String("tab", "", "Tab ID to get element from (optional, uses current tab if not specified)")
getElementSelector := getElementCmd.String("selector", "", "CSS selector for the element") getElementSelector := getElementCmd.String("selector", "", "CSS selector for the element")
getElementSelectionTimeout := getElementCmd.Int("selection-timeout", 5, "Timeout in seconds for finding the element") getElementTimeout := getElementCmd.Int("timeout", 5, "Timeout in seconds for finding the element")
getElementHost := getElementCmd.String("host", "localhost", "Daemon host") getElementHost := getElementCmd.String("host", "localhost", "Daemon host")
getElementPort := getElementCmd.Int("port", 8989, "Daemon port") getElementPort := getElementCmd.Int("port", 8989, "Daemon port")
// click-element flags // click-element flags
clickElementTabID := clickElementCmd.String("tab", "", "Tab ID to click element in (optional, uses current tab if not specified)") clickElementTabID := clickElementCmd.String("tab", "", "Tab ID to click element in (optional, uses current tab if not specified)")
clickElementSelector := clickElementCmd.String("selector", "", "CSS selector for the element to click") clickElementSelector := clickElementCmd.String("selector", "", "CSS selector for the element to click")
clickElementSelectionTimeout := clickElementCmd.Int("selection-timeout", 5, "Timeout in seconds for finding the element") clickElementTimeout := clickElementCmd.Int("timeout", 5, "Timeout in seconds for the click operation")
clickElementActionTimeout := clickElementCmd.Int("action-timeout", 5, "Timeout in seconds for the click action")
clickElementHost := clickElementCmd.String("host", "localhost", "Daemon host") clickElementHost := clickElementCmd.String("host", "localhost", "Daemon host")
clickElementPort := clickElementCmd.Int("port", 8989, "Daemon port") clickElementPort := clickElementCmd.Int("port", 8989, "Daemon port")
@ -213,7 +209,7 @@ func main() {
c := client.NewClient(*fillFormHost, *fillFormPort) c := client.NewClient(*fillFormHost, *fillFormPort)
// Fill the form field // Fill the form field
err := c.FillFormField(*fillFormTabID, *fillFormSelector, *fillFormValue, *fillFormSelectionTimeout, *fillFormActionTimeout) err := c.FillFormField(*fillFormTabID, *fillFormSelector, *fillFormValue, *fillFormTimeout)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err) fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1) os.Exit(1)
@ -248,7 +244,7 @@ func main() {
fmt.Printf("File transferred to container: %s\n", containerPath) fmt.Printf("File transferred to container: %s\n", containerPath)
// Then upload the file to the web form using the container path // Then upload the file to the web form using the container path
err = c.UploadFile(*uploadFileTabID, *uploadFileSelector, containerPath, *uploadFileSelectionTimeout, *uploadFileActionTimeout) err = c.UploadFile(*uploadFileTabID, *uploadFileSelector, containerPath, *uploadFileTimeout)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "Error uploading file to web form: %v\n", err) fmt.Fprintf(os.Stderr, "Error uploading file to web form: %v\n", err)
os.Exit(1) os.Exit(1)
@ -268,7 +264,7 @@ func main() {
c := client.NewClient(*submitFormHost, *submitFormPort) c := client.NewClient(*submitFormHost, *submitFormPort)
// Submit the form // Submit the form
err := c.SubmitForm(*submitFormTabID, *submitFormSelector, *submitFormSelectionTimeout, *submitFormActionTimeout) err := c.SubmitForm(*submitFormTabID, *submitFormSelector, *submitFormTimeout)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err) fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1) os.Exit(1)
@ -304,7 +300,7 @@ func main() {
c := client.NewClient(*getElementHost, *getElementPort) c := client.NewClient(*getElementHost, *getElementPort)
// Get the element HTML // Get the element HTML
html, err := c.GetElementHTML(*getElementTabID, *getElementSelector, *getElementSelectionTimeout) html, err := c.GetElementHTML(*getElementTabID, *getElementSelector, *getElementTimeout)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err) fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1) os.Exit(1)
@ -325,7 +321,7 @@ func main() {
c := client.NewClient(*clickElementHost, *clickElementPort) c := client.NewClient(*clickElementHost, *clickElementPort)
// Click the element // Click the element
err := c.ClickElement(*clickElementTabID, *clickElementSelector, *clickElementSelectionTimeout, *clickElementActionTimeout) err := c.ClickElement(*clickElementTabID, *clickElementSelector, *clickElementTimeout)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err) fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1) os.Exit(1)

View File

@ -4,7 +4,7 @@ This guide explains how LLMs can use the cremote MCP (Model Context Protocol) to
## 🎉 Complete Web Automation Platform ## 🎉 Complete Web Automation Platform
The cremote MCP server provides **27 comprehensive web automation tools** organized across 5 enhancement phases: The cremote MCP server provides **30 comprehensive web automation tools** organized across 6 enhancement phases:
- **Core Tools (10)**: Essential web automation capabilities - **Core Tools (10)**: Essential web automation capabilities
- **Phase 1 (2)**: Element state checking and conditional logic - **Phase 1 (2)**: Element state checking and conditional logic
@ -12,8 +12,9 @@ The cremote MCP server provides **27 comprehensive web automation tools** organi
- **Phase 3 (3)**: Form analysis and bulk operations - **Phase 3 (3)**: Form analysis and bulk operations
- **Phase 4 (4)**: Page state and metadata tools - **Phase 4 (4)**: Page state and metadata tools
- **Phase 5 (4)**: Enhanced screenshots and file management - **Phase 5 (4)**: Enhanced screenshots and file management
- **Phase 6 (3)**: Accessibility tree support for semantic understanding
## Available Tools (27 Total) ## Available Tools (30 Total)
### 1. `web_navigate_cremotemcp` ### 1. `web_navigate_cremotemcp`
Navigate to URLs and optionally take screenshots. Navigate to URLs and optionally take screenshots.
@ -1337,12 +1338,89 @@ web_extract_multiple_cremotemcp:
footer_text: "footer" footer_text: "footer"
``` ```
### Phase 6: Accessibility Tree Support (3 Tools)
#### `get_accessibility_tree_cremotemcp`
Get the full accessibility tree for a page or with limited depth for semantic understanding.
**Parameters:**
- `tab` (optional): Tab ID, uses current tab if not specified
- `depth` (optional): Maximum depth to retrieve, omit for full tree
- `timeout` (optional): Timeout in seconds, default 5
**Use Cases:**
- Accessibility testing and compliance verification
- Understanding page structure for screen readers
- Semantic element discovery and analysis
**Example:**
```
get_accessibility_tree_cremotemcp:
depth: 3
timeout: 10
```
#### `get_partial_accessibility_tree_cremotemcp`
Get accessibility tree for a specific element and its relatives (ancestors, siblings, children).
**Parameters:**
- `selector`: CSS selector for the target element (required)
- `tab` (optional): Tab ID, uses current tab if not specified
- `fetch_relatives` (optional): Include relatives, default true
- `timeout` (optional): Timeout in seconds, default 5
**Use Cases:**
- Focused accessibility analysis of specific components
- Form accessibility structure analysis
- Widget accessibility verification
**Example:**
```
get_partial_accessibility_tree_cremotemcp:
selector: "form.login-form"
fetch_relatives: true
timeout: 10
```
#### `query_accessibility_tree_cremotemcp`
Query accessibility tree for nodes matching specific criteria (accessible name, role, or scope).
**Parameters:**
- `tab` (optional): Tab ID, uses current tab if not specified
- `selector` (optional): CSS selector to limit search scope
- `accessible_name` (optional): Accessible name to match
- `role` (optional): ARIA role to match (e.g., "button", "textbox", "link")
- `timeout` (optional): Timeout in seconds, default 5
**Use Cases:**
- Find elements by their accessible names (what screen readers announce)
- Locate elements by ARIA roles for semantic interaction
- Accessibility-aware element discovery and testing
**Examples:**
```
# Find all buttons on the page
query_accessibility_tree_cremotemcp:
role: "button"
# Find submit button by accessible name
query_accessibility_tree_cremotemcp:
accessible_name: "Submit"
role: "button"
# Find form controls within a specific form
query_accessibility_tree_cremotemcp:
selector: "form.checkout"
role: "textbox"
```
## Integration Notes ## Integration Notes
- Tools use the `_cremotemcp` suffix to avoid naming conflicts - Tools use the `_cremotemcp` suffix to avoid naming conflicts
- Responses include success status and descriptive messages - Responses include success status and descriptive messages
- Screenshots are saved to `/tmp/` directory with timestamps - Screenshots are saved to `/tmp/` directory with timestamps
- The underlying cremote daemon handles browser management - The underlying cremote daemon handles browser management
- Accessibility tree tools provide semantic understanding of page structure
## Advanced Usage Examples ## Advanced Usage Examples
@ -1483,6 +1561,40 @@ web_screenshot_enhanced_cremotemcp:
full_page: true full_page: true
``` ```
### Accessibility Testing and Semantic Automation
```
# Navigate to page for accessibility testing
web_navigate_cremotemcp:
url: "https://myapp.com/form"
screenshot: true
# Get full accessibility tree to analyze structure
get_accessibility_tree_cremotemcp:
depth: 3
timeout: 10
# Find form elements by accessible names (more robust than CSS selectors)
query_accessibility_tree_cremotemcp:
accessible_name: "Email Address"
role: "textbox"
# Fill form using accessibility-aware approach
web_interact_cremotemcp:
action: "fill"
selector: "[aria-label='Email Address']"
value: "user@example.com"
# Find and click submit button by accessible name
query_accessibility_tree_cremotemcp:
accessible_name: "Submit Form"
role: "button"
# Verify form accessibility structure
get_partial_accessibility_tree_cremotemcp:
selector: "form"
fetch_relatives: true
```
## 🎯 Best Practices for LLM Agents ## 🎯 Best Practices for LLM Agents
### 1. **Use Batch Operations** ### 1. **Use Batch Operations**
@ -1505,10 +1617,16 @@ web_screenshot_enhanced_cremotemcp:
- Leverage `console_logs_cremotemcp` for JavaScript error detection - Leverage `console_logs_cremotemcp` for JavaScript error detection
- Take `web_screenshot_enhanced_cremotemcp` with metadata for comprehensive documentation - Take `web_screenshot_enhanced_cremotemcp` with metadata for comprehensive documentation
### 5. **Accessibility-Aware Automation**
- Use `query_accessibility_tree_cremotemcp` to find elements by accessible names instead of fragile selectors
- Verify accessibility compliance with `get_accessibility_tree_cremotemcp`
- Test screen reader compatibility by analyzing semantic structure
- Build more robust automation using ARIA roles and accessible names
## 🎉 Production Ready ## 🎉 Production Ready
This comprehensive web automation platform provides **27 tools** across 5 enhancement phases, optimized specifically for LLM agents and production workflows. All tools include proper error handling, timeout management, and structured responses for reliable automation. This comprehensive web automation platform provides **30 tools** across 6 enhancement phases, optimized specifically for LLM agents and production workflows. All tools include proper error handling, timeout management, and structured responses for reliable automation.
--- ---
**Ready for Production**: Complete web automation platform with 27 tools, designed for maximum efficiency and reliability in LLM-driven workflows. **Ready for Production**: Complete web automation platform with 30 tools, designed for maximum efficiency and reliability in LLM-driven workflows.

278
mcp/MULTI_CLIENT_GUIDE.md Normal file
View File

@ -0,0 +1,278 @@
# Cremote MCP Multi-Client Support
The Cremote MCP server now supports multiple concurrent clients with isolated browser sessions. This allows multiple AI agents or applications to use the same cremote daemon simultaneously without interfering with each other's browser state.
## Architecture Overview
```
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ Client A │ │ Client B │ │ Client N │
│ (Agent 1) │ │ (Agent 2) │ │ (Agent N) │
└─────────┬───────┘ └─────────┬───────┘ └─────────┬───────┘
│ │ │
│ HTTP/Session │ HTTP/Session │ HTTP/Session
│ │ │
└──────────────────────┼──────────────────────┘
┌─────────────┴─────────────┐
│ Cremote MCP Server │
│ (Session Manager) │
└─────────────┬─────────────┘
│ TCP
┌─────────────┴─────────────┐
│ Cremote Daemon │
│ (Shared Browser) │
└─────────────┬─────────────┘
│ DevTools Protocol
┌─────────────┴─────────────┐
│ Chrome/Chromium │
│ (All tabs accessible) │
└───────────────────────────┘
```
## Transport Modes
### 1. stdio Transport (Single Client - Legacy)
**Default mode** - Maintains backward compatibility with existing clients.
```bash
# Default mode (stdio)
./cremote-mcp
# Or explicitly set
CREMOTE_TRANSPORT=stdio ./cremote-mcp
```
- **Clients**: 1 concurrent client
- **Communication**: stdin/stdout JSON-RPC
- **Session Management**: Single global state
- **Use Case**: Existing integrations, single-agent scenarios
### 2. HTTP Transport (Multiple Clients - New)
**Multi-client mode** - Supports concurrent clients with session isolation.
```bash
# Enable HTTP transport
CREMOTE_TRANSPORT=http ./cremote-mcp
```
- **Clients**: Multiple concurrent clients
- **Communication**: HTTP POST/GET with JSON-RPC
- **Session Management**: Per-client isolated sessions
- **Use Case**: Multiple agents, concurrent automation
## Configuration
### Environment Variables
| Variable | Default | Description |
|----------|---------|-------------|
| `CREMOTE_TRANSPORT` | `stdio` | Transport mode: `stdio` or `http` |
| `CREMOTE_HOST` | `localhost` | Cremote daemon host |
| `CREMOTE_PORT` | `8989` | Cremote daemon port |
| `CREMOTE_HTTP_HOST` | `localhost` | HTTP server host (HTTP mode only) |
| `CREMOTE_HTTP_PORT` | `8990` | HTTP server port (HTTP mode only) |
### Example Configurations
#### Single Client (Legacy)
```bash
export CREMOTE_HOST=localhost
export CREMOTE_PORT=8989
export CREMOTE_TRANSPORT=stdio
./cremote-mcp
```
#### Multiple Clients
```bash
export CREMOTE_HOST=localhost
export CREMOTE_PORT=8989
export CREMOTE_TRANSPORT=http
export CREMOTE_HTTP_HOST=localhost
export CREMOTE_HTTP_PORT=8990
./cremote-mcp
```
## Session Management
### Session Lifecycle
1. **Initialization**: Client sends `initialize` request, receives `Mcp-Session-Id` header
2. **Operations**: All subsequent requests include the session ID header
3. **Isolation**: Each session maintains independent browser state
4. **Cleanup**: Sessions auto-expire after 30 minutes of inactivity
5. **Termination**: Clients can explicitly terminate sessions with DELETE request
### Session State
Each client session maintains isolated state:
- **Current Tab**: Independent active tab tracking
- **Tab History**: Per-client tab navigation history
- **Iframe Context**: Independent iframe mode state
- **Screenshots**: Per-client screenshot collection
### Session Headers
HTTP clients must include session headers:
```http
POST /mcp HTTP/1.1
Content-Type: application/json
Accept: application/json
Mcp-Session-Id: a1b2c3d4e5f6g7h8
MCP-Protocol-Version: 2025-06-18
{
"jsonrpc": "2.0",
"id": 1,
"method": "tools/call",
"params": {
"name": "web_navigate_cremotemcp",
"arguments": {
"url": "https://example.com"
}
}
}
```
## Testing Multi-Client Setup
### Prerequisites
1. **Start Cremote Daemon**:
```bash
cremotedaemon
```
2. **Start Chrome with Remote Debugging**:
```bash
chromium --remote-debugging-port=9222 --user-data-dir=/tmp/chromium-debug
```
### Run Multi-Client Test
```bash
cd mcp/
./test_multiclient.sh
```
This test:
- Starts the MCP server in HTTP mode
- Creates 3 concurrent test clients
- Verifies each gets a unique session ID
- Tests session isolation
- Cleans up all sessions
### Manual Testing
1. **Start HTTP Server**:
```bash
CREMOTE_TRANSPORT=http ./cremote-mcp
```
2. **Test with curl**:
```bash
# Initialize session
curl -X POST http://localhost:8990/mcp \
-H "Content-Type: application/json" \
-H "Accept: application/json" \
-d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2025-06-18"}}'
# Use returned Mcp-Session-Id for subsequent requests
```
## Benefits
### For AI Agents
- **Concurrent Operations**: Multiple agents can browse simultaneously
- **State Isolation**: No interference between agent sessions
- **Resource Sharing**: Shared browser instance reduces memory usage
- **Session Recovery**: Automatic cleanup prevents resource leaks
### For Developers
- **Scalability**: Support multiple concurrent automations
- **Debugging**: Isolated sessions simplify troubleshooting
- **Flexibility**: Choose transport mode based on use case
- **Compatibility**: Backward compatible with existing stdio clients
## Limitations
### Current Implementation
- **Tool Coverage**: Not all 27 tools are session-aware yet (work in progress)
- **SSE Streaming**: Server-Sent Events not implemented yet
- **Advanced Features**: Some MCP protocol features pending
### Planned Improvements
- Complete tool migration to session-aware handlers
- SSE support for real-time notifications
- Enhanced session management features
- Performance optimizations
## Migration Guide
### From Single Client to Multi-Client
1. **Update Environment**:
```bash
# Old
./cremote-mcp
# New
CREMOTE_TRANSPORT=http ./cremote-mcp
```
2. **Update Client Code**:
- Switch from stdio to HTTP transport
- Handle session ID headers
- Implement proper session cleanup
3. **Test Thoroughly**:
- Verify session isolation
- Test concurrent operations
- Monitor resource usage
### Backward Compatibility
Existing stdio clients continue to work unchanged:
- No code changes required
- Same tool interface
- Same behavior and performance
## Troubleshooting
### Common Issues
1. **Session Not Found (404)**:
- Session expired (30min timeout)
- Invalid session ID
- Server restart cleared sessions
2. **Port Conflicts**:
- Change `CREMOTE_HTTP_PORT` if 8990 is in use
- Ensure cremote daemon port (8989) is available
3. **CORS Issues**:
- Server includes CORS headers for web clients
- Use proper Accept headers in requests
### Debug Mode
Enable debug logging:
```bash
CREMOTE_TRANSPORT=http ./cremote-mcp 2>&1 | tee mcp-debug.log
```
## Next Steps
1. **Complete Tool Migration**: Migrate remaining tools to session-aware handlers
2. **Add SSE Support**: Implement Server-Sent Events for streaming
3. **Enhanced Testing**: Add comprehensive integration tests
4. **Performance Tuning**: Optimize session management and cleanup
5. **Documentation**: Complete API documentation and examples

View File

@ -4,7 +4,18 @@ This is a Model Context Protocol (MCP) server that exposes cremote's web automat
## 🎉 Complete Web Automation Platform ## 🎉 Complete Web Automation Platform
**27 comprehensive tools** across 5 enhancement phases, providing a complete web automation toolkit for LLM agents: **30 comprehensive tools** across 6 enhancement phases, providing a complete web automation toolkit for LLM agents:
### 🚀 **NEW: Multi-Client Support**
The Cremote MCP server now supports **multiple concurrent clients** with isolated browser sessions:
- **Concurrent Agents**: Multiple AI agents can use the same browser simultaneously
- **Session Isolation**: Each client maintains independent browser state (tabs, history, iframe context)
- **Transport Flexibility**: Choose between stdio (single client) or HTTP (multiple clients)
- **Backward Compatible**: Existing stdio clients continue to work unchanged
See the [Multi-Client Guide](MULTI_CLIENT_GUIDE.md) for detailed setup and usage instructions.
- **Phase 1**: Element state checking and conditional logic (2 tools) - **Phase 1**: Element state checking and conditional logic (2 tools)
- **Phase 2**: Enhanced data extraction and batch operations (4 tools) - **Phase 2**: Enhanced data extraction and batch operations (4 tools)
@ -22,6 +33,7 @@ This is a Model Context Protocol (MCP) server that exposes cremote's web automat
- **Rich Context**: Page metadata, performance metrics, and content verification - **Rich Context**: Page metadata, performance metrics, and content verification
- **Enhanced Screenshots**: Element-specific and metadata-rich screenshot capture - **Enhanced Screenshots**: Element-specific and metadata-rich screenshot capture
- **File Management**: Bulk file operations and automated cleanup - **File Management**: Bulk file operations and automated cleanup
- **Accessibility Tree**: Chrome accessibility tree interface for semantic understanding
- **Automatic Screenshots**: Optional screenshot capture for debugging and documentation - **Automatic Screenshots**: Optional screenshot capture for debugging and documentation
- **Error Recovery**: Better error handling and context for LLMs - **Error Recovery**: Better error handling and context for LLMs
- **Resource Management**: Automatic cleanup and connection management - **Resource Management**: Automatic cleanup and connection management
@ -30,7 +42,7 @@ This is a Model Context Protocol (MCP) server that exposes cremote's web automat
**For LLM agents**: See the comprehensive [LLM Usage Guide](LLM_USAGE_GUIDE.md) for detailed usage instructions, examples, and best practices. **For LLM agents**: See the comprehensive [LLM Usage Guide](LLM_USAGE_GUIDE.md) for detailed usage instructions, examples, and best practices.
## Available Tools (27 Total) ## Available Tools (30 Total)
### Version Information ### Version Information
@ -608,13 +620,38 @@ go build -o cremote-mcp .
### Configuration ### Configuration
#### Basic Configuration (Single Client - stdio)
Set environment variables to configure the cremote connection: Set environment variables to configure the cremote connection:
```bash ```bash
export CREMOTE_HOST=localhost export CREMOTE_HOST=localhost
export CREMOTE_PORT=8989 export CREMOTE_PORT=8989
export CREMOTE_TRANSPORT=stdio # Default
``` ```
#### Multi-Client Configuration (HTTP Transport)
For multiple concurrent clients:
```bash
export CREMOTE_HOST=localhost
export CREMOTE_PORT=8989
export CREMOTE_TRANSPORT=http
export CREMOTE_HTTP_HOST=localhost
export CREMOTE_HTTP_PORT=8990
```
#### Environment Variables
| Variable | Default | Description |
|----------|---------|-------------|
| `CREMOTE_TRANSPORT` | `stdio` | Transport mode: `stdio` or `http` |
| `CREMOTE_HOST` | `localhost` | Cremote daemon host |
| `CREMOTE_PORT` | `8989` | Cremote daemon port |
| `CREMOTE_HTTP_HOST` | `localhost` | HTTP server host (HTTP mode only) |
| `CREMOTE_HTTP_PORT` | `8990` | HTTP server port (HTTP mode only) |
### Running with Claude Desktop ### Running with Claude Desktop
Add to your Claude Desktop configuration (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS): Add to your Claude Desktop configuration (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):
@ -772,6 +809,53 @@ All tool responses include:
} }
``` ```
### Phase 6: Accessibility Tree Support (3 Tools)
#### `get_accessibility_tree_cremotemcp`
Get the full accessibility tree for a page or with limited depth.
```json
{
"name": "get_accessibility_tree_cremotemcp",
"arguments": {
"tab": "optional-tab-id",
"depth": 3,
"timeout": 10
}
}
```
#### `get_partial_accessibility_tree_cremotemcp`
Get accessibility tree for a specific element and its relatives.
```json
{
"name": "get_partial_accessibility_tree_cremotemcp",
"arguments": {
"selector": "form",
"tab": "optional-tab-id",
"fetch_relatives": true,
"timeout": 10
}
}
```
#### `query_accessibility_tree_cremotemcp`
Query accessibility tree for nodes matching specific criteria.
```json
{
"name": "query_accessibility_tree_cremotemcp",
"arguments": {
"tab": "optional-tab-id",
"selector": "form",
"accessible_name": "Submit",
"role": "button",
"timeout": 10
}
}
```
## Benefits Over CLI ## Benefits Over CLI
### 🎯 **Enhanced Efficiency** ### 🎯 **Enhanced Efficiency**
@ -796,8 +880,8 @@ All tool responses include:
This comprehensive web automation platform is **production ready** with: This comprehensive web automation platform is **production ready** with:
- **27 Tools**: Complete coverage of web automation needs - **30 Tools**: Complete coverage of web automation needs
- **5 Enhancement Phases**: Systematic capability building from basic to advanced - **6 Enhancement Phases**: Systematic capability building from basic to advanced
- **Extensive Testing**: All tools validated and documented - **Extensive Testing**: All tools validated and documented
- **LLM Optimized**: Designed specifically for AI agent workflows - **LLM Optimized**: Designed specifically for AI agent workflows
- **Backward Compatible**: All existing tools continue to work unchanged - **Backward Compatible**: All existing tools continue to work unchanged
@ -811,6 +895,7 @@ This comprehensive web automation platform is **production ready** with:
| **Form Automation** | 3 tools | Form analysis, bulk filling, batch interactions | | **Form Automation** | 3 tools | Form analysis, bulk filling, batch interactions |
| **Page Intelligence** | 4 tools | Page state, performance metrics, content verification, viewport info | | **Page Intelligence** | 4 tools | Page state, performance metrics, content verification, viewport info |
| **Enhanced Capabilities** | 4 tools | Element screenshots, enhanced metadata, bulk file ops, file management | | **Enhanced Capabilities** | 4 tools | Element screenshots, enhanced metadata, bulk file ops, file management |
| **Accessibility Tree** | 3 tools | Semantic understanding, accessibility testing, screen reader simulation |
## Development ## Development
@ -825,4 +910,4 @@ The server is designed to be easily extensible while maintaining consistency wit
--- ---
**🚀 Ready for Production**: Complete web automation platform with 27 tools across 5 enhancement phases, optimized for LLM agents and production workflows. **🚀 Ready for Production**: Complete web automation platform with 30 tools across 6 enhancement phases, optimized for LLM agents and production workflows.

File diff suppressed because it is too large Load Diff

39
test_accessibility.sh Executable file
View File

@ -0,0 +1,39 @@
#!/bin/bash
echo "=== Cremote Accessibility Tree Test Suite ==="
echo
# Check if cremotedaemon is running
echo "Checking if cremotedaemon is running..."
if ! pgrep -f cremotedaemon > /dev/null; then
echo "ERROR: cremotedaemon is not running"
echo "Please start it first: cremotedaemon"
exit 1
fi
echo "✓ cremotedaemon is running"
# Build the test
echo "Building accessibility test..."
cd tests
go build -o test_accessibility test_accessibility.go
if [ $? -ne 0 ]; then
echo "ERROR: Failed to build test"
exit 1
fi
echo "✓ Test built successfully"
# Run the test
echo
echo "Running accessibility tree tests..."
echo "=================================="
./test_accessibility
# Cleanup
echo
echo "Cleaning up..."
rm -f test_accessibility
cd ..
echo "✓ Cleanup completed"
echo
echo "=== Test Suite Completed ==="

176
tests/test_accessibility.go Normal file
View File

@ -0,0 +1,176 @@
package main
import (
"fmt"
"log"
"time"
"git.teamworkapps.com/shortcut/cremote/client"
)
func main() {
fmt.Println("=== Cremote Accessibility Tree Test ===")
fmt.Println()
// Create client
c := client.NewClient("localhost", 8989)
// Check if daemon is running
fmt.Println("Checking daemon status...")
running, err := c.CheckStatus()
if err != nil || !running {
log.Fatalf("Daemon is not running or not accessible: %v", err)
}
fmt.Println("✓ Daemon is running")
// Open a new tab
fmt.Println("Opening new tab...")
tabID, err := c.OpenTab(5)
if err != nil {
log.Fatalf("Failed to open tab: %v", err)
}
fmt.Printf("✓ Opened tab: %s\n", tabID)
// Navigate to a test page with accessibility content
fmt.Println("Navigating to test page...")
testURL := "https://brokedown.net/formtest.php"
err = c.LoadURL(tabID, testURL, 10)
if err != nil {
log.Fatalf("Failed to load URL: %v", err)
}
fmt.Printf("✓ Loaded: %s\n", testURL)
// Wait for page to fully load
time.Sleep(2 * time.Second)
// Test 1: Get full accessibility tree
fmt.Println("\n--- Test 1: Full Accessibility Tree ---")
fullTree, err := c.GetAccessibilityTree(tabID, nil, 10)
if err != nil {
log.Printf("❌ Failed to get full accessibility tree: %v", err)
} else {
fmt.Printf("✓ Retrieved full accessibility tree with %d nodes\n", len(fullTree.Nodes))
// Print first few nodes for inspection
fmt.Println("First 3 nodes:")
for i, node := range fullTree.Nodes {
if i >= 3 {
break
}
fmt.Printf(" Node %d: ID=%s, Role=%s, Name=%s, Ignored=%v\n",
i+1, node.NodeID, getRoleValue(node.Role), getNameValue(node.Name), node.Ignored)
}
}
// Test 2: Get accessibility tree with limited depth
fmt.Println("\n--- Test 2: Limited Depth Accessibility Tree ---")
depth := 2
limitedTree, err := c.GetAccessibilityTree(tabID, &depth, 10)
if err != nil {
log.Printf("❌ Failed to get limited depth accessibility tree: %v", err)
} else {
fmt.Printf("✓ Retrieved accessibility tree (depth=%d) with %d nodes\n", depth, len(limitedTree.Nodes))
}
// Test 3: Get partial accessibility tree for a specific element
fmt.Println("\n--- Test 3: Partial Accessibility Tree ---")
selector := "form"
partialTree, err := c.GetPartialAccessibilityTree(tabID, selector, true, 10)
if err != nil {
log.Printf("❌ Failed to get partial accessibility tree: %v", err)
} else {
fmt.Printf("✓ Retrieved partial accessibility tree for '%s' with %d nodes\n", selector, len(partialTree.Nodes))
// Print details of found nodes
fmt.Println("Form-related accessibility nodes:")
for i, node := range partialTree.Nodes {
if i >= 5 { // Limit output
break
}
fmt.Printf(" Node %d: Role=%s, Name=%s, BackendNodeID=%d\n",
i+1, getRoleValue(node.Role), getNameValue(node.Name), node.BackendDOMNodeID)
}
}
// Test 4: Query accessibility tree by role
fmt.Println("\n--- Test 4: Query by Role ---")
roleQuery, err := c.QueryAccessibilityTree(tabID, "", "", "textbox", 10)
if err != nil {
log.Printf("❌ Failed to query accessibility tree by role: %v", err)
} else {
fmt.Printf("✓ Found %d textbox elements\n", len(roleQuery.Nodes))
for i, node := range roleQuery.Nodes {
if i >= 3 { // Limit output
break
}
fmt.Printf(" Textbox %d: Name=%s, Value=%s\n",
i+1, getNameValue(node.Name), getValueValue(node.Value))
}
}
// Test 5: Query accessibility tree by accessible name
fmt.Println("\n--- Test 5: Query by Accessible Name ---")
nameQuery, err := c.QueryAccessibilityTree(tabID, "", "Submit", "", 10)
if err != nil {
log.Printf("❌ Failed to query accessibility tree by name: %v", err)
} else {
fmt.Printf("✓ Found %d elements with accessible name 'Submit'\n", len(nameQuery.Nodes))
for i, node := range nameQuery.Nodes {
fmt.Printf(" Element %d: Role=%s, Name=%s\n",
i+1, getRoleValue(node.Role), getNameValue(node.Name))
}
}
// Test 6: Query with selector scope
fmt.Println("\n--- Test 6: Query with Selector Scope ---")
scopedQuery, err := c.QueryAccessibilityTree(tabID, "form", "", "button", 10)
if err != nil {
log.Printf("❌ Failed to query accessibility tree with selector scope: %v", err)
} else {
fmt.Printf("✓ Found %d button elements within form\n", len(scopedQuery.Nodes))
}
// Clean up
fmt.Println("\n--- Cleanup ---")
err = c.CloseTab(tabID, 5)
if err != nil {
log.Printf("❌ Failed to close tab: %v", err)
} else {
fmt.Println("✓ Tab closed")
}
fmt.Println("\n=== Accessibility Tree Test Completed ===")
}
// Helper functions to extract values from AXValue structs
func getRoleValue(axValue *client.AXValue) string {
if axValue == nil || axValue.Value == nil {
return "unknown"
}
if str, ok := axValue.Value.(string); ok {
return str
}
return fmt.Sprintf("%v", axValue.Value)
}
func getNameValue(axValue *client.AXValue) string {
if axValue == nil || axValue.Value == nil {
return ""
}
if str, ok := axValue.Value.(string); ok {
return str
}
return fmt.Sprintf("%v", axValue.Value)
}
func getValueValue(axValue *client.AXValue) string {
if axValue == nil || axValue.Value == nil {
return ""
}
if str, ok := axValue.Value.(string); ok {
return str
}
return fmt.Sprintf("%v", axValue.Value)
}