docs+skills: add main UI/UX visual-truth PRD and skill links
This commit is contained in:
parent
1c36223e7f
commit
14a50ad4ae
289 changed files with 54463 additions and 0 deletions
297
.agents/skills/agent-browser/references/authentication.md
Normal file
297
.agents/skills/agent-browser/references/authentication.md
Normal file
|
|
@ -0,0 +1,297 @@
|
|||
# Authentication Patterns
|
||||
|
||||
Login flows, OAuth, 2FA, and authenticated browsing.
|
||||
|
||||
**Related**: [session-management.md](session-management.md) for session details, [SKILL.md](../SKILL.md) for quick start.
|
||||
|
||||
## Contents
|
||||
|
||||
- [Basic Login Flow](#basic-login-flow)
|
||||
- [OAuth / SSO Flows](#oauth--sso-flows)
|
||||
- [Two-Factor Authentication](#two-factor-authentication)
|
||||
- [Session Reuse Patterns](#session-reuse-patterns)
|
||||
- [Cookie Extraction](#cookie-extraction)
|
||||
- [Security Best Practices](#security-best-practices)
|
||||
|
||||
## Basic Login Flow
|
||||
|
||||
Standard username/password login:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
|
||||
# Start session
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://app.example.com/login"
|
||||
}' | jq -r '.session_id')
|
||||
|
||||
# Get form elements
|
||||
# Expected: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Sign In"
|
||||
|
||||
# Fill credentials
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "fill", "ref": "@e1", "text": "user@example.com"
|
||||
}'
|
||||
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "fill", "ref": "@e2", "text": "'"$PASSWORD"'"
|
||||
}'
|
||||
|
||||
# Submit
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "click", "ref": "@e3"
|
||||
}'
|
||||
|
||||
# Wait for redirect
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "wait", "wait_ms": 2000
|
||||
}'
|
||||
|
||||
# Verify login succeeded
|
||||
RESULT=$(infsh app run agent-browser --function snapshot --session $SESSION --input '{}')
|
||||
URL=$(echo $RESULT | jq -r '.url')
|
||||
|
||||
if [[ "$URL" == *"/login"* ]]; then
|
||||
echo "Login failed - still on login page"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Login successful"
|
||||
# Continue with authenticated actions...
|
||||
```
|
||||
|
||||
## OAuth / SSO Flows
|
||||
|
||||
For OAuth redirects (Google, GitHub, etc.):
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://app.example.com/auth/google"
|
||||
}' | jq -r '.session_id')
|
||||
|
||||
# Wait for redirect to Google
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "wait", "wait_ms": 3000
|
||||
}'
|
||||
|
||||
# Snapshot to see Google login form
|
||||
RESULT=$(infsh app run agent-browser --function snapshot --session $SESSION --input '{}')
|
||||
echo $RESULT | jq '.elements_text'
|
||||
|
||||
# Fill Google email
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "fill", "ref": "@e1", "text": "user@gmail.com"
|
||||
}'
|
||||
|
||||
# Click Next
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "click", "ref": "@e2"
|
||||
}'
|
||||
|
||||
# Wait and snapshot for password field
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "wait", "wait_ms": 2000
|
||||
}'
|
||||
RESULT=$(infsh app run agent-browser --function snapshot --session $SESSION --input '{}')
|
||||
|
||||
# Fill password
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "fill", "ref": "@e1", "text": "'"$GOOGLE_PASSWORD"'"
|
||||
}'
|
||||
|
||||
# Click Sign in
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "click", "ref": "@e2"
|
||||
}'
|
||||
|
||||
# Wait for redirect back to app
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "wait", "wait_ms": 5000
|
||||
}'
|
||||
|
||||
# Verify we're back on the app
|
||||
RESULT=$(infsh app run agent-browser --function snapshot --session $SESSION --input '{}')
|
||||
URL=$(echo $RESULT | jq -r '.url')
|
||||
echo "Final URL: $URL"
|
||||
```
|
||||
|
||||
## Two-Factor Authentication
|
||||
|
||||
For 2FA, you may need human intervention or TOTP generation:
|
||||
|
||||
### With TOTP Code
|
||||
|
||||
```bash
|
||||
# After password, check for 2FA prompt
|
||||
RESULT=$(infsh app run agent-browser --function snapshot --session $SESSION --input '{}')
|
||||
ELEMENTS=$(echo $RESULT | jq -r '.elements_text')
|
||||
|
||||
if echo "$ELEMENTS" | grep -qi "verification\|2fa\|authenticator"; then
|
||||
# Generate TOTP code (requires oathtool)
|
||||
TOTP_CODE=$(oathtool --totp -b "$TOTP_SECRET")
|
||||
|
||||
# Fill 2FA code
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "fill", "ref": "@e1", "text": "'"$TOTP_CODE"'"
|
||||
}'
|
||||
|
||||
# Submit
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "click", "ref": "@e2"
|
||||
}'
|
||||
fi
|
||||
```
|
||||
|
||||
### With Manual Intervention
|
||||
|
||||
For SMS or hardware token 2FA:
|
||||
|
||||
```bash
|
||||
# Record video so user can see the 2FA prompt
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://app.example.com/login",
|
||||
"record_video": true
|
||||
}' | jq -r '.session_id')
|
||||
|
||||
# ... login flow ...
|
||||
|
||||
# At 2FA step, prompt user
|
||||
echo "2FA code sent. Enter the code:"
|
||||
read -r CODE
|
||||
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "fill", "ref": "@e1", "text": "'"$CODE"'"
|
||||
}'
|
||||
```
|
||||
|
||||
## Session Reuse Patterns
|
||||
|
||||
Since sessions maintain cookies, you can reuse authenticated sessions:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# login-and-work.sh
|
||||
|
||||
# Login once
|
||||
login() {
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://app.example.com/login"
|
||||
}' | jq -r '.session_id')
|
||||
|
||||
# ... login steps ...
|
||||
|
||||
echo $SESSION
|
||||
}
|
||||
|
||||
# Do work with authenticated session
|
||||
do_work() {
|
||||
local SESSION=$1
|
||||
|
||||
# Navigate to protected page
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "goto", "url": "https://app.example.com/dashboard"
|
||||
}'
|
||||
|
||||
# Extract data
|
||||
infsh app run agent-browser --function snapshot --session $SESSION --input '{}'
|
||||
}
|
||||
|
||||
# Main
|
||||
SESSION=$(login)
|
||||
do_work $SESSION
|
||||
|
||||
# Don't close if you want to reuse!
|
||||
# infsh app run agent-browser --function close --session $SESSION --input '{}'
|
||||
```
|
||||
|
||||
## Cookie Extraction
|
||||
|
||||
Extract cookies for use in other tools:
|
||||
|
||||
```bash
|
||||
# Get cookies via JavaScript
|
||||
RESULT=$(infsh app run agent-browser --function execute --session $SESSION --input '{
|
||||
"code": "document.cookie"
|
||||
}')
|
||||
COOKIES=$(echo $RESULT | jq -r '.result')
|
||||
echo "Cookies: $COOKIES"
|
||||
|
||||
# Get all cookies including httpOnly (more complete)
|
||||
RESULT=$(infsh app run agent-browser --function execute --session $SESSION --input '{
|
||||
"code": "JSON.stringify(performance.getEntriesByType(\"resource\").map(r => r.name))"
|
||||
}')
|
||||
```
|
||||
|
||||
## Security Best Practices
|
||||
|
||||
### 1. Never Hardcode Credentials
|
||||
|
||||
```bash
|
||||
# Good: Use environment variables
|
||||
'{"action": "fill", "ref": "@e2", "text": "'"$PASSWORD"'"}'
|
||||
|
||||
# Bad: Hardcoded
|
||||
'{"action": "fill", "ref": "@e2", "text": "mypassword123"}'
|
||||
```
|
||||
|
||||
### 2. Use Secure Environment Variables
|
||||
|
||||
```bash
|
||||
# Set securely
|
||||
export PASSWORD=$(cat /path/to/secure/password)
|
||||
|
||||
# Or use a secrets manager
|
||||
export PASSWORD=$(vault read -field=password secret/app)
|
||||
```
|
||||
|
||||
### 3. Don't Log Sensitive Data
|
||||
|
||||
```bash
|
||||
# Good: Redact sensitive info
|
||||
echo "Logging in as $USERNAME"
|
||||
|
||||
# Bad: Logging passwords
|
||||
echo "Password: $PASSWORD" # Never do this!
|
||||
```
|
||||
|
||||
### 4. Close Sessions After Use
|
||||
|
||||
```bash
|
||||
# Always clean up
|
||||
trap 'infsh app run agent-browser --function close --session $SESSION --input "{}" 2>/dev/null' EXIT
|
||||
```
|
||||
|
||||
### 5. Use Video Recording for Debugging Only
|
||||
|
||||
Video may capture sensitive information:
|
||||
|
||||
```bash
|
||||
# Only enable when debugging
|
||||
if [ "$DEBUG" = "true" ]; then
|
||||
RECORD_VIDEO="true"
|
||||
else
|
||||
RECORD_VIDEO="false"
|
||||
fi
|
||||
```
|
||||
|
||||
### 6. Verify Login Success
|
||||
|
||||
Always confirm authentication worked:
|
||||
|
||||
```bash
|
||||
# Check URL changed from login page
|
||||
URL=$(echo $RESULT | jq -r '.url')
|
||||
if [[ "$URL" == *"/login"* ]] || [[ "$URL" == *"/signin"* ]]; then
|
||||
echo "ERROR: Login failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Or check for specific element on authenticated page
|
||||
ELEMENTS=$(echo $RESULT | jq -r '.elements_text')
|
||||
if ! echo "$ELEMENTS" | grep -q "Logout\|Dashboard\|Welcome"; then
|
||||
echo "ERROR: Not authenticated"
|
||||
exit 1
|
||||
fi
|
||||
```
|
||||
272
.agents/skills/agent-browser/references/commands.md
Normal file
272
.agents/skills/agent-browser/references/commands.md
Normal file
|
|
@ -0,0 +1,272 @@
|
|||
# Command Reference
|
||||
|
||||
Complete reference for all agent-browser functions. For quick start, see [SKILL.md](../SKILL.md).
|
||||
|
||||
## Base Command
|
||||
|
||||
All commands follow this pattern:
|
||||
|
||||
```bash
|
||||
infsh app run agent-browser --function <function> --session <session_id|new> --input '<json>'
|
||||
```
|
||||
|
||||
- `--function`: Function to call (open, snapshot, interact, screenshot, execute, close)
|
||||
- `--session`: Session ID from previous call, or `new` to start fresh
|
||||
- `--input`: JSON input for the function
|
||||
|
||||
## Functions
|
||||
|
||||
### open
|
||||
|
||||
Navigate to URL and configure browser. This is the entry point for all sessions.
|
||||
|
||||
```bash
|
||||
infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://example.com",
|
||||
"width": 1280,
|
||||
"height": 720,
|
||||
"user_agent": "Mozilla/5.0...",
|
||||
"record_video": false,
|
||||
"show_cursor": false,
|
||||
"proxy_url": null,
|
||||
"proxy_username": null,
|
||||
"proxy_password": null
|
||||
}'
|
||||
```
|
||||
|
||||
**Input Fields:**
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `url` | string | required | URL to navigate to |
|
||||
| `width` | int | 1280 | Viewport width in pixels |
|
||||
| `height` | int | 720 | Viewport height in pixels |
|
||||
| `user_agent` | string | null | Custom user agent string |
|
||||
| `record_video` | bool | false | Record video (returned on close) |
|
||||
| `show_cursor` | bool | false | Show cursor indicator in screenshots/video |
|
||||
| `proxy_url` | string | null | Proxy server URL |
|
||||
| `proxy_username` | string | null | Proxy auth username |
|
||||
| `proxy_password` | string | null | Proxy auth password |
|
||||
|
||||
**Output:**
|
||||
|
||||
```json
|
||||
{
|
||||
"session_id": "abc123",
|
||||
"url": "https://example.com",
|
||||
"title": "Example Domain",
|
||||
"elements": [...],
|
||||
"elements_text": "@e1 [a] \"More information...\" href=\"...\"\n...",
|
||||
"screenshot": "<File>"
|
||||
}
|
||||
```
|
||||
|
||||
### snapshot
|
||||
|
||||
Re-fetch page state with `@e` refs. Call after navigation or DOM changes.
|
||||
|
||||
```bash
|
||||
infsh app run agent-browser --function snapshot --session $SESSION_ID --input '{}'
|
||||
```
|
||||
|
||||
**Output:** Same as `open` (url, title, elements, elements_text, screenshot)
|
||||
|
||||
### interact
|
||||
|
||||
Perform actions on the page using `@e` refs.
|
||||
|
||||
```bash
|
||||
infsh app run agent-browser --function interact --session $SESSION_ID --input '{
|
||||
"action": "click",
|
||||
"ref": "@e1"
|
||||
}'
|
||||
```
|
||||
|
||||
**Input Fields:**
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `action` | string | Action to perform (see Actions table) |
|
||||
| `ref` | string | Element ref (e.g., `@e1`) |
|
||||
| `text` | string | Text for fill/type/press/select |
|
||||
| `direction` | string | Scroll direction: up, down, left, right |
|
||||
| `scroll_amount` | int | Scroll pixels (default 400) |
|
||||
| `wait_ms` | int | Wait duration in milliseconds |
|
||||
| `url` | string | URL for goto action |
|
||||
| `target_ref` | string | Target ref for drag action |
|
||||
| `file_paths` | array | File paths for upload action |
|
||||
|
||||
**Actions:**
|
||||
|
||||
| Action | Required Fields | Description |
|
||||
|--------|-----------------|-------------|
|
||||
| `click` | `ref` | Single click |
|
||||
| `dblclick` | `ref` | Double click |
|
||||
| `fill` | `ref`, `text` | Clear input and type text |
|
||||
| `type` | `text` | Type text without clearing |
|
||||
| `press` | `text` | Press key (Enter, Tab, Escape, etc.) |
|
||||
| `select` | `ref`, `text` | Select dropdown option by label |
|
||||
| `hover` | `ref` | Hover over element |
|
||||
| `check` | `ref` | Check checkbox |
|
||||
| `uncheck` | `ref` | Uncheck checkbox |
|
||||
| `drag` | `ref`, `target_ref` | Drag from ref to target_ref |
|
||||
| `upload` | `ref`, `file_paths` | Upload files to file input |
|
||||
| `scroll` | `direction` | Scroll page (optional: `scroll_amount`) |
|
||||
| `back` | - | Go back in browser history |
|
||||
| `wait` | `wait_ms` | Wait for specified milliseconds |
|
||||
| `goto` | `url` | Navigate to different URL |
|
||||
|
||||
**Output:**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"action": "click",
|
||||
"message": null,
|
||||
"screenshot": "<File>",
|
||||
"snapshot": {
|
||||
"url": "...",
|
||||
"title": "...",
|
||||
"elements": [...],
|
||||
"elements_text": "..."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### screenshot
|
||||
|
||||
Take a screenshot of the current page.
|
||||
|
||||
```bash
|
||||
infsh app run agent-browser --function screenshot --session $SESSION_ID --input '{
|
||||
"full_page": true
|
||||
}'
|
||||
```
|
||||
|
||||
**Input Fields:**
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `full_page` | bool | false | Capture full scrollable page |
|
||||
|
||||
**Output:**
|
||||
|
||||
```json
|
||||
{
|
||||
"screenshot": "<File>",
|
||||
"width": 1280,
|
||||
"height": 720
|
||||
}
|
||||
```
|
||||
|
||||
### execute
|
||||
|
||||
Run JavaScript code on the page.
|
||||
|
||||
```bash
|
||||
infsh app run agent-browser --function execute --session $SESSION_ID --input '{
|
||||
"code": "document.title"
|
||||
}'
|
||||
```
|
||||
|
||||
**Input Fields:**
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `code` | string | JavaScript code to execute |
|
||||
|
||||
**Output:**
|
||||
|
||||
```json
|
||||
{
|
||||
"result": "Example Domain",
|
||||
"error": null,
|
||||
"screenshot": "<File>"
|
||||
}
|
||||
```
|
||||
|
||||
**Examples:**
|
||||
|
||||
```bash
|
||||
# Get page title
|
||||
'{"code": "document.title"}'
|
||||
|
||||
# Count elements
|
||||
'{"code": "document.querySelectorAll(\"a\").length"}'
|
||||
|
||||
# Extract text
|
||||
'{"code": "document.querySelector(\"h1\").textContent"}'
|
||||
|
||||
# Get all links
|
||||
'{"code": "Array.from(document.querySelectorAll(\"a\")).map(a => a.href)"}'
|
||||
|
||||
# Scroll to bottom
|
||||
'{"code": "window.scrollTo(0, document.body.scrollHeight)"}'
|
||||
|
||||
# Get computed style
|
||||
'{"code": "getComputedStyle(document.body).backgroundColor"}'
|
||||
```
|
||||
|
||||
### close
|
||||
|
||||
Close the browser session. Returns video if recording was enabled.
|
||||
|
||||
```bash
|
||||
infsh app run agent-browser --function close --session $SESSION_ID --input '{}'
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"video": "<File or null>"
|
||||
}
|
||||
```
|
||||
|
||||
## Key Combinations
|
||||
|
||||
For the `press` action, use these key names:
|
||||
|
||||
| Key | Name |
|
||||
|-----|------|
|
||||
| Enter | `Enter` |
|
||||
| Tab | `Tab` |
|
||||
| Escape | `Escape` |
|
||||
| Backspace | `Backspace` |
|
||||
| Delete | `Delete` |
|
||||
| Arrow keys | `ArrowUp`, `ArrowDown`, `ArrowLeft`, `ArrowRight` |
|
||||
| Modifiers | `Control`, `Shift`, `Alt`, `Meta` |
|
||||
|
||||
**Key combinations:**
|
||||
|
||||
```bash
|
||||
# Ctrl+A (select all)
|
||||
'{"action": "press", "text": "Control+a"}'
|
||||
|
||||
# Ctrl+C (copy)
|
||||
'{"action": "press", "text": "Control+c"}'
|
||||
|
||||
# Shift+Tab (focus previous)
|
||||
'{"action": "press", "text": "Shift+Tab"}'
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
When an action fails, `success` is `false` and `message` contains the error:
|
||||
|
||||
```json
|
||||
{
|
||||
"success": false,
|
||||
"action": "click",
|
||||
"message": "Unknown ref: @e99. Run 'snapshot' to get current elements.",
|
||||
"screenshot": "<File>",
|
||||
"snapshot": {...}
|
||||
}
|
||||
```
|
||||
|
||||
Common errors:
|
||||
- `Unknown ref: @eN` - Ref doesn't exist, re-snapshot needed
|
||||
- `'text' required for fill action` - Missing required field
|
||||
- `'target_ref' required for drag action` - Missing drag target
|
||||
- `Timeout 5000ms exceeded` - Element not found or not clickable
|
||||
295
.agents/skills/agent-browser/references/proxy-support.md
Normal file
295
.agents/skills/agent-browser/references/proxy-support.md
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
# Proxy Support
|
||||
|
||||
Proxy configuration for geo-testing, privacy, and corporate environments.
|
||||
|
||||
**Related**: [commands.md](commands.md) for full function reference, [SKILL.md](../SKILL.md) for quick start.
|
||||
|
||||
## Contents
|
||||
|
||||
- [Basic Proxy Configuration](#basic-proxy-configuration)
|
||||
- [Authenticated Proxy](#authenticated-proxy)
|
||||
- [Common Use Cases](#common-use-cases)
|
||||
- [Proxy Types](#proxy-types)
|
||||
- [Verifying Proxy Connection](#verifying-proxy-connection)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [Best Practices](#best-practices)
|
||||
|
||||
## Basic Proxy Configuration
|
||||
|
||||
Set proxy when opening a session:
|
||||
|
||||
```bash
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://example.com",
|
||||
"proxy_url": "http://proxy.example.com:8080"
|
||||
}' | jq -r '.session_id')
|
||||
```
|
||||
|
||||
All traffic for this session routes through the proxy.
|
||||
|
||||
## Authenticated Proxy
|
||||
|
||||
For proxies requiring username/password:
|
||||
|
||||
```bash
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://example.com",
|
||||
"proxy_url": "http://proxy.example.com:8080",
|
||||
"proxy_username": "myuser",
|
||||
"proxy_password": "mypassword"
|
||||
}' | jq -r '.session_id')
|
||||
```
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
### Geo-Location Testing
|
||||
|
||||
Test how your site appears from different regions:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Test from multiple regions
|
||||
|
||||
PROXIES=(
|
||||
"us|http://us-proxy.example.com:8080"
|
||||
"eu|http://eu-proxy.example.com:8080"
|
||||
"asia|http://asia-proxy.example.com:8080"
|
||||
)
|
||||
|
||||
for entry in "${PROXIES[@]}"; do
|
||||
REGION="${entry%%|*}"
|
||||
PROXY="${entry##*|}"
|
||||
|
||||
echo "Testing from: $REGION"
|
||||
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://mysite.com",
|
||||
"proxy_url": "'"$PROXY"'"
|
||||
}' | jq -r '.session_id')
|
||||
|
||||
# Take screenshot
|
||||
infsh app run agent-browser --function screenshot --session $SESSION --input '{
|
||||
"full_page": true
|
||||
}' > "${REGION}-screenshot.json"
|
||||
|
||||
# Get page content
|
||||
RESULT=$(infsh app run agent-browser --function snapshot --session $SESSION --input '{}')
|
||||
echo $RESULT | jq '.elements_text' > "${REGION}-elements.txt"
|
||||
|
||||
infsh app run agent-browser --function close --session $SESSION --input '{}'
|
||||
done
|
||||
|
||||
echo "Geo-testing complete"
|
||||
```
|
||||
|
||||
### Rate Limit Avoidance
|
||||
|
||||
Rotate proxies for web scraping:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Rotate through proxy list
|
||||
|
||||
PROXIES=(
|
||||
"http://proxy1.example.com:8080"
|
||||
"http://proxy2.example.com:8080"
|
||||
"http://proxy3.example.com:8080"
|
||||
)
|
||||
|
||||
URLS=(
|
||||
"https://site.com/page1"
|
||||
"https://site.com/page2"
|
||||
"https://site.com/page3"
|
||||
)
|
||||
|
||||
for i in "${!URLS[@]}"; do
|
||||
# Rotate proxy
|
||||
PROXY_INDEX=$((i % ${#PROXIES[@]}))
|
||||
PROXY="${PROXIES[$PROXY_INDEX]}"
|
||||
URL="${URLS[$i]}"
|
||||
|
||||
echo "Fetching $URL via proxy $((PROXY_INDEX + 1))"
|
||||
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "'"$URL"'",
|
||||
"proxy_url": "'"$PROXY"'"
|
||||
}' | jq -r '.session_id')
|
||||
|
||||
# Extract data
|
||||
RESULT=$(infsh app run agent-browser --function execute --session $SESSION --input '{
|
||||
"code": "document.body.innerText"
|
||||
}')
|
||||
echo $RESULT | jq -r '.result' > "page-$i.txt"
|
||||
|
||||
infsh app run agent-browser --function close --session $SESSION --input '{}'
|
||||
|
||||
# Polite delay
|
||||
sleep 1
|
||||
done
|
||||
```
|
||||
|
||||
### Corporate Network Access
|
||||
|
||||
Access sites through corporate proxy:
|
||||
|
||||
```bash
|
||||
# Use corporate proxy for external sites
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://external-vendor.com",
|
||||
"proxy_url": "http://corpproxy.company.com:8080",
|
||||
"proxy_username": "'"$CORP_USER"'",
|
||||
"proxy_password": "'"$CORP_PASS"'"
|
||||
}' | jq -r '.session_id')
|
||||
```
|
||||
|
||||
### Privacy and Anonymity
|
||||
|
||||
Route through privacy-focused proxy:
|
||||
|
||||
```bash
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://whatismyip.com",
|
||||
"proxy_url": "socks5://privacy-proxy.example.com:1080"
|
||||
}' | jq -r '.session_id')
|
||||
```
|
||||
|
||||
## Proxy Types
|
||||
|
||||
### HTTP/HTTPS Proxy
|
||||
|
||||
```json
|
||||
{"proxy_url": "http://proxy.example.com:8080"}
|
||||
{"proxy_url": "https://proxy.example.com:8080"}
|
||||
```
|
||||
|
||||
### SOCKS5 Proxy
|
||||
|
||||
```json
|
||||
{"proxy_url": "socks5://proxy.example.com:1080"}
|
||||
```
|
||||
|
||||
### With Authentication
|
||||
|
||||
```json
|
||||
{
|
||||
"proxy_url": "http://proxy.example.com:8080",
|
||||
"proxy_username": "user",
|
||||
"proxy_password": "pass"
|
||||
}
|
||||
```
|
||||
|
||||
## Verifying Proxy Connection
|
||||
|
||||
Check that traffic routes through proxy:
|
||||
|
||||
```bash
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://httpbin.org/ip",
|
||||
"proxy_url": "http://proxy.example.com:8080"
|
||||
}' | jq -r '.session_id')
|
||||
|
||||
# Get the IP shown
|
||||
RESULT=$(infsh app run agent-browser --function execute --session $SESSION --input '{
|
||||
"code": "document.body.innerText"
|
||||
}')
|
||||
echo "IP via proxy: $(echo $RESULT | jq -r '.result')"
|
||||
|
||||
infsh app run agent-browser --function close --session $SESSION --input '{}'
|
||||
```
|
||||
|
||||
The IP should be the proxy's IP, not your real IP.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Connection Failed
|
||||
|
||||
```
|
||||
Error: Failed to open URL: net::ERR_PROXY_CONNECTION_FAILED
|
||||
```
|
||||
|
||||
**Solutions:**
|
||||
1. Verify proxy URL is correct
|
||||
2. Check proxy is running and accessible
|
||||
3. Confirm port is correct
|
||||
4. Test proxy with curl: `curl -x http://proxy:8080 https://example.com`
|
||||
|
||||
### Authentication Failed
|
||||
|
||||
```
|
||||
Error: 407 Proxy Authentication Required
|
||||
```
|
||||
|
||||
**Solutions:**
|
||||
1. Verify username/password are correct
|
||||
2. Check if proxy requires different auth method
|
||||
3. Ensure credentials don't contain special characters that need escaping
|
||||
|
||||
### SSL Errors
|
||||
|
||||
Some proxies perform SSL inspection. If you see certificate errors:
|
||||
|
||||
```bash
|
||||
# The browser should handle most SSL proxies automatically
|
||||
# If issues persist, verify proxy SSL certificate is valid
|
||||
```
|
||||
|
||||
### Slow Performance
|
||||
|
||||
**Solutions:**
|
||||
1. Choose proxy closer to target site
|
||||
2. Use faster proxy provider
|
||||
3. Reduce number of requests per session
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Use Environment Variables
|
||||
|
||||
```bash
|
||||
# Good: Credentials in env vars
|
||||
'{"proxy_url": "'"$PROXY_URL"'", "proxy_username": "'"$PROXY_USER"'"}'
|
||||
|
||||
# Bad: Hardcoded
|
||||
'{"proxy_url": "http://user:pass@proxy.com:8080"}'
|
||||
```
|
||||
|
||||
### 2. Test Proxy Before Automation
|
||||
|
||||
```bash
|
||||
# Verify proxy works
|
||||
curl -x "$PROXY_URL" https://httpbin.org/ip
|
||||
```
|
||||
|
||||
### 3. Handle Proxy Failures
|
||||
|
||||
```bash
|
||||
# Retry with different proxy on failure
|
||||
for PROXY in "${PROXIES[@]}"; do
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "'"$URL"'",
|
||||
"proxy_url": "'"$PROXY"'"
|
||||
}' 2>&1)
|
||||
|
||||
if echo "$SESSION" | jq -e '.session_id' > /dev/null 2>&1; then
|
||||
SESSION_ID=$(echo $SESSION | jq -r '.session_id')
|
||||
break
|
||||
fi
|
||||
echo "Proxy $PROXY failed, trying next..."
|
||||
done
|
||||
```
|
||||
|
||||
### 4. Respect Rate Limits
|
||||
|
||||
Even with proxies, be a good citizen:
|
||||
|
||||
```bash
|
||||
# Add delays between requests
|
||||
'{"action": "wait", "wait_ms": 1000}'
|
||||
```
|
||||
|
||||
### 5. Log Proxy Usage
|
||||
|
||||
For debugging, log which proxy was used:
|
||||
|
||||
```bash
|
||||
echo "$(date): Using proxy $PROXY for $URL" >> proxy.log
|
||||
```
|
||||
204
.agents/skills/agent-browser/references/session-management.md
Normal file
204
.agents/skills/agent-browser/references/session-management.md
Normal file
|
|
@ -0,0 +1,204 @@
|
|||
# Session Management
|
||||
|
||||
Browser sessions for state persistence and parallel browsing.
|
||||
|
||||
**Related**: [authentication.md](authentication.md) for login patterns, [SKILL.md](../SKILL.md) for quick start.
|
||||
|
||||
## Contents
|
||||
|
||||
- [How Sessions Work](#how-sessions-work)
|
||||
- [Starting a Session](#starting-a-session)
|
||||
- [Using Session IDs](#using-session-ids)
|
||||
- [Session State](#session-state)
|
||||
- [Parallel Sessions](#parallel-sessions)
|
||||
- [Session Cleanup](#session-cleanup)
|
||||
- [Best Practices](#best-practices)
|
||||
|
||||
## How Sessions Work
|
||||
|
||||
Each session maintains an isolated browser context with:
|
||||
- Cookies
|
||||
- LocalStorage / SessionStorage
|
||||
- Browser history
|
||||
- Page state
|
||||
- Video recording (if enabled)
|
||||
|
||||
Sessions persist across function calls, allowing multi-step workflows.
|
||||
|
||||
## Starting a Session
|
||||
|
||||
Use `--session new` to create a fresh session:
|
||||
|
||||
```bash
|
||||
RESULT=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://example.com"
|
||||
}')
|
||||
SESSION_ID=$(echo $RESULT | jq -r '.session_id')
|
||||
echo "Session: $SESSION_ID"
|
||||
```
|
||||
|
||||
## Using Session IDs
|
||||
|
||||
All subsequent calls use the session ID:
|
||||
|
||||
```bash
|
||||
# Navigate
|
||||
infsh app run agent-browser --function open --session $SESSION_ID --input '{
|
||||
"url": "https://example.com/page2"
|
||||
}'
|
||||
|
||||
# Interact
|
||||
infsh app run agent-browser --function interact --session $SESSION_ID --input '{
|
||||
"action": "click", "ref": "@e1"
|
||||
}'
|
||||
|
||||
# Screenshot
|
||||
infsh app run agent-browser --function screenshot --session $SESSION_ID --input '{}'
|
||||
|
||||
# Close
|
||||
infsh app run agent-browser --function close --session $SESSION_ID --input '{}'
|
||||
```
|
||||
|
||||
## Session State
|
||||
|
||||
### What Persists
|
||||
|
||||
Within a session, these persist across calls:
|
||||
- Cookies (login state, preferences)
|
||||
- LocalStorage and SessionStorage
|
||||
- IndexedDB data
|
||||
- Browser history (for back/forward)
|
||||
- Current page and DOM state
|
||||
- Video recording buffer
|
||||
|
||||
### What Doesn't Persist
|
||||
|
||||
- Sessions don't persist across server restarts
|
||||
- No automatic session recovery
|
||||
- Video is only available until close is called
|
||||
|
||||
## Parallel Sessions
|
||||
|
||||
Run multiple independent sessions simultaneously:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Scrape multiple sites in parallel
|
||||
|
||||
# Start sessions
|
||||
RESULT1=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://site1.com"
|
||||
}')
|
||||
SESSION1=$(echo $RESULT1 | jq -r '.session_id')
|
||||
|
||||
RESULT2=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://site2.com"
|
||||
}')
|
||||
SESSION2=$(echo $RESULT2 | jq -r '.session_id')
|
||||
|
||||
# Work with each session independently
|
||||
infsh app run agent-browser --function screenshot --session $SESSION1 --input '{}' &
|
||||
infsh app run agent-browser --function screenshot --session $SESSION2 --input '{}' &
|
||||
wait
|
||||
|
||||
# Clean up both
|
||||
infsh app run agent-browser --function close --session $SESSION1 --input '{}'
|
||||
infsh app run agent-browser --function close --session $SESSION2 --input '{}'
|
||||
```
|
||||
|
||||
### Use Cases for Parallel Sessions
|
||||
|
||||
1. **A/B Testing** - Compare different pages or user experiences
|
||||
2. **Multi-site scraping** - Gather data from multiple sources
|
||||
3. **Load testing** - Simulate multiple users
|
||||
4. **Cross-region testing** - Use different proxies per session
|
||||
|
||||
## Session Cleanup
|
||||
|
||||
Always close sessions when done:
|
||||
|
||||
```bash
|
||||
infsh app run agent-browser --function close --session $SESSION_ID --input '{}'
|
||||
```
|
||||
|
||||
**Why close matters:**
|
||||
- Releases server resources
|
||||
- Returns video recording (if enabled)
|
||||
- Prevents resource leaks
|
||||
|
||||
### Error Handling
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
cleanup() {
|
||||
infsh app run agent-browser --function close --session $SESSION_ID --input '{}' 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
SESSION_ID=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://example.com"
|
||||
}' | jq -r '.session_id')
|
||||
|
||||
# ... your automation ...
|
||||
# cleanup runs automatically on exit
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Store Session IDs
|
||||
|
||||
```bash
|
||||
# Good: Store for reuse
|
||||
SESSION_ID=$(... | jq -r '.session_id')
|
||||
infsh ... --session $SESSION_ID ...
|
||||
|
||||
# Bad: Parse every time
|
||||
infsh ... --session $(... | jq -r '.session_id') ...
|
||||
```
|
||||
|
||||
### 2. Close Sessions Promptly
|
||||
|
||||
Don't leave sessions open longer than needed. Server resources are limited.
|
||||
|
||||
### 3. Use Meaningful Variable Names
|
||||
|
||||
```bash
|
||||
# Good: Clear purpose
|
||||
LOGIN_SESSION=$(...)
|
||||
SCRAPE_SESSION=$(...)
|
||||
|
||||
# Bad: Generic names
|
||||
S1=$(...)
|
||||
S2=$(...)
|
||||
```
|
||||
|
||||
### 4. Handle Session Expiry
|
||||
|
||||
Sessions may expire after extended inactivity:
|
||||
|
||||
```bash
|
||||
# Check if session is still valid
|
||||
RESULT=$(infsh app run agent-browser --function snapshot --session $SESSION_ID --input '{}' 2>&1)
|
||||
if echo "$RESULT" | grep -q "session not found"; then
|
||||
echo "Session expired, starting new one"
|
||||
SESSION_ID=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://example.com"
|
||||
}' | jq -r '.session_id')
|
||||
fi
|
||||
```
|
||||
|
||||
### 5. One Task Per Session
|
||||
|
||||
For clarity, use one session per logical task:
|
||||
|
||||
```bash
|
||||
# Good: Separate sessions for separate tasks
|
||||
LOGIN_SESSION=$(...) # Handle login
|
||||
SCRAPE_SESSION=$(...) # Handle scraping
|
||||
|
||||
# Okay for related tasks: One session for a workflow
|
||||
SESSION=$(...)
|
||||
# login -> navigate -> extract -> close
|
||||
```
|
||||
251
.agents/skills/agent-browser/references/snapshot-refs.md
Normal file
251
.agents/skills/agent-browser/references/snapshot-refs.md
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
# Snapshot and Refs
|
||||
|
||||
Compact element references that reduce context usage for AI agents.
|
||||
|
||||
**Related**: [commands.md](commands.md) for full function reference, [SKILL.md](../SKILL.md) for quick start.
|
||||
|
||||
## Contents
|
||||
|
||||
- [How Refs Work](#how-refs-work)
|
||||
- [Snapshot Output Format](#snapshot-output-format)
|
||||
- [Using Refs](#using-refs)
|
||||
- [Ref Lifecycle](#ref-lifecycle)
|
||||
- [Best Practices](#best-practices)
|
||||
- [Ref Notation Details](#ref-notation-details)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
|
||||
## How Refs Work
|
||||
|
||||
Traditional approach:
|
||||
```
|
||||
Full DOM/HTML -> AI parses -> CSS selector -> Action (~3000-5000 tokens)
|
||||
```
|
||||
|
||||
agent-browser approach:
|
||||
```
|
||||
Compact snapshot -> @refs assigned -> Direct interaction (~200-400 tokens)
|
||||
```
|
||||
|
||||
The snapshot extracts interactive elements and assigns short `@e` refs, reducing token usage significantly.
|
||||
|
||||
## Snapshot Output Format
|
||||
|
||||
```bash
|
||||
infsh app run agent-browser --function snapshot --session $SESSION --input '{}'
|
||||
```
|
||||
|
||||
**Response `elements_text`:**
|
||||
|
||||
```
|
||||
@e1 [a] "Home" href="/"
|
||||
@e2 [a] "Products" href="/products"
|
||||
@e3 [a] "About" href="/about"
|
||||
@e4 [button] "Sign In"
|
||||
@e5 [input type="email"] placeholder="Email"
|
||||
@e6 [input type="password"] placeholder="Password"
|
||||
@e7 [button type="submit"] "Log In"
|
||||
@e8 [input type="checkbox"] name="remember"
|
||||
```
|
||||
|
||||
**Response `elements` (structured):**
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"ref": "@e1",
|
||||
"desc": "@e1 [a] \"Home\" href=\"/\"",
|
||||
"tag": "a",
|
||||
"text": "Home",
|
||||
"role": null,
|
||||
"name": null,
|
||||
"href": "/",
|
||||
"input_type": null
|
||||
},
|
||||
...
|
||||
]
|
||||
```
|
||||
|
||||
## Using Refs
|
||||
|
||||
Once you have refs, interact directly:
|
||||
|
||||
```bash
|
||||
# Click the "Sign In" button
|
||||
'{"action": "click", "ref": "@e4"}'
|
||||
|
||||
# Fill email input
|
||||
'{"action": "fill", "ref": "@e5", "text": "user@example.com"}'
|
||||
|
||||
# Fill password
|
||||
'{"action": "fill", "ref": "@e6", "text": "password123"}'
|
||||
|
||||
# Submit the form
|
||||
'{"action": "click", "ref": "@e7"}'
|
||||
|
||||
# Check the "remember me" checkbox
|
||||
'{"action": "check", "ref": "@e8"}'
|
||||
```
|
||||
|
||||
## Ref Lifecycle
|
||||
|
||||
**IMPORTANT**: Refs are invalidated when the page changes!
|
||||
|
||||
```bash
|
||||
# Get initial snapshot
|
||||
infsh app run agent-browser --function snapshot --session $SESSION --input '{}'
|
||||
# @e1 [button] "Next"
|
||||
|
||||
# Click triggers page change
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "click", "ref": "@e1"
|
||||
}'
|
||||
|
||||
# MUST re-snapshot to get new refs!
|
||||
infsh app run agent-browser --function snapshot --session $SESSION --input '{}'
|
||||
# @e1 [h1] "Page 2" <- Different element now!
|
||||
```
|
||||
|
||||
### When to Re-snapshot
|
||||
|
||||
Always re-snapshot after:
|
||||
|
||||
1. **Navigation** - Clicking links, form submissions, `goto` action
|
||||
2. **Dynamic content** - AJAX loads, modals opening, tabs switching
|
||||
3. **Page mutations** - JavaScript modifying the DOM
|
||||
|
||||
The `interact` function returns a fresh snapshot in its response, so you can often use that instead of a separate snapshot call.
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Always Use the Latest Snapshot
|
||||
|
||||
```bash
|
||||
# CORRECT: Use snapshot from previous response
|
||||
RESULT=$(infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "click", "ref": "@e1"
|
||||
}')
|
||||
# Use elements from $RESULT.snapshot for next action
|
||||
|
||||
# WRONG: Using stale refs
|
||||
# After navigation, @e1 may point to a completely different element
|
||||
```
|
||||
|
||||
### 2. Check Success Before Continuing
|
||||
|
||||
```bash
|
||||
RESULT=$(infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "click", "ref": "@e5"
|
||||
}')
|
||||
|
||||
SUCCESS=$(echo $RESULT | jq -r '.success')
|
||||
if [ "$SUCCESS" != "true" ]; then
|
||||
echo "Click failed: $(echo $RESULT | jq -r '.message')"
|
||||
# Re-snapshot and retry
|
||||
fi
|
||||
```
|
||||
|
||||
### 3. Use elements_text for Quick Decisions
|
||||
|
||||
For AI agents, `elements_text` provides a compact text representation:
|
||||
|
||||
```
|
||||
@e1 [input type="email"] placeholder="Email"
|
||||
@e2 [input type="password"] placeholder="Password"
|
||||
@e3 [button] "Submit"
|
||||
```
|
||||
|
||||
This is often enough to decide which element to interact with without parsing the full `elements` array.
|
||||
|
||||
## Ref Notation Details
|
||||
|
||||
```
|
||||
@e1 [tag type="value"] "text content" name="attr"
|
||||
| | | | |
|
||||
| | | | +- Additional attributes
|
||||
| | | +- Visible text
|
||||
| | +- Key attributes shown
|
||||
| +- HTML tag name
|
||||
+- Unique ref ID
|
||||
```
|
||||
|
||||
### Common Patterns
|
||||
|
||||
```
|
||||
@e1 [button] "Submit" # Button with text
|
||||
@e2 [input type="email"] # Email input
|
||||
@e3 [input type="password"] # Password input
|
||||
@e4 [a] "Link Text" href="/page" # Anchor link
|
||||
@e5 [select] # Dropdown
|
||||
@e6 [textarea] placeholder="Message" # Text area
|
||||
@e7 [input type="file"] # File upload
|
||||
@e8 [input type="checkbox"] checked # Checked checkbox
|
||||
@e9 [input type="radio"] selected # Selected radio
|
||||
@e10 [button type="submit"] "Send" # Submit button
|
||||
```
|
||||
|
||||
### Elements Captured
|
||||
|
||||
The snapshot captures these interactive elements:
|
||||
|
||||
- Links (`<a href>`)
|
||||
- Buttons (`<button>`, `[role="button"]`)
|
||||
- Inputs (`<input>`, `<textarea>`, `<select>`)
|
||||
- Clickable elements (`[onclick]`, `[tabindex]`)
|
||||
- ARIA roles (`[role="link"]`, `[role="checkbox"]`, etc.)
|
||||
|
||||
Non-interactive or hidden elements are filtered out.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### "Unknown ref" Error
|
||||
|
||||
```json
|
||||
{
|
||||
"success": false,
|
||||
"message": "Unknown ref: @e15. Run 'snapshot' to get current elements."
|
||||
}
|
||||
```
|
||||
|
||||
**Solution**: Re-snapshot. The page changed and refs are stale.
|
||||
|
||||
```bash
|
||||
infsh app run agent-browser --function snapshot --session $SESSION --input '{}'
|
||||
# Now use the new refs
|
||||
```
|
||||
|
||||
### Element Not in Snapshot
|
||||
|
||||
The element you need might not appear because:
|
||||
|
||||
1. **Not visible** - Scroll to reveal it
|
||||
```bash
|
||||
'{"action": "scroll", "direction": "down", "scroll_amount": 500}'
|
||||
```
|
||||
|
||||
2. **Not interactive** - Use JavaScript to interact
|
||||
```bash
|
||||
'{"code": "document.querySelector(\".hidden-btn\").click()"}'
|
||||
```
|
||||
|
||||
3. **In iframe** - Currently not supported (use `execute` with JS)
|
||||
|
||||
4. **Dynamic** - Wait for it to load
|
||||
```bash
|
||||
'{"action": "wait", "wait_ms": 2000}'
|
||||
```
|
||||
|
||||
### Too Many Elements
|
||||
|
||||
Snapshots are limited to 50 elements. If the page has more:
|
||||
|
||||
1. **Scroll** to bring relevant elements into view
|
||||
2. **Use JavaScript** to target specific elements
|
||||
3. **Navigate** to a more specific page
|
||||
|
||||
### Ref Points to Wrong Element
|
||||
|
||||
If a ref seems to interact with the wrong element:
|
||||
|
||||
1. Re-snapshot to get fresh refs
|
||||
2. Check if the page structure changed
|
||||
3. Verify with screenshot that the right element is targeted
|
||||
286
.agents/skills/agent-browser/references/video-recording.md
Normal file
286
.agents/skills/agent-browser/references/video-recording.md
Normal file
|
|
@ -0,0 +1,286 @@
|
|||
# Video Recording
|
||||
|
||||
Capture browser automation as video for debugging, documentation, or verification.
|
||||
|
||||
**Related**: [commands.md](commands.md) for full function reference, [SKILL.md](../SKILL.md) for quick start.
|
||||
|
||||
## Contents
|
||||
|
||||
- [Basic Recording](#basic-recording)
|
||||
- [Cursor Indicator](#cursor-indicator)
|
||||
- [How Recording Works](#how-recording-works)
|
||||
- [Use Cases](#use-cases)
|
||||
- [Best Practices](#best-practices)
|
||||
- [Output Format](#output-format)
|
||||
- [Limitations](#limitations)
|
||||
|
||||
## Basic Recording
|
||||
|
||||
Enable video recording when opening a session:
|
||||
|
||||
```bash
|
||||
# Start with recording enabled
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://example.com",
|
||||
"record_video": true
|
||||
}' | jq -r '.session_id')
|
||||
|
||||
# Perform actions
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "click", "ref": "@e1"
|
||||
}'
|
||||
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "fill", "ref": "@e2", "text": "test input"
|
||||
}'
|
||||
|
||||
# Close to get the video
|
||||
RESULT=$(infsh app run agent-browser --function close --session $SESSION --input '{}')
|
||||
VIDEO=$(echo $RESULT | jq -r '.video')
|
||||
echo "Video file: $VIDEO"
|
||||
```
|
||||
|
||||
## Cursor Indicator
|
||||
|
||||
For demos and documentation, show a visible cursor that follows mouse movements:
|
||||
|
||||
```bash
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://example.com",
|
||||
"record_video": true,
|
||||
"show_cursor": true
|
||||
}' | jq -r '.session_id')
|
||||
```
|
||||
|
||||
The cursor appears as a red dot that:
|
||||
- Follows mouse movements in real-time
|
||||
- Shows click feedback (shrinks on mousedown)
|
||||
- Persists across page navigations
|
||||
- Appears in both screenshots and video
|
||||
|
||||
This is especially useful for:
|
||||
- Tutorial/documentation videos
|
||||
- Debugging interaction issues
|
||||
- Sharing recordings with non-technical stakeholders
|
||||
|
||||
## How Recording Works
|
||||
|
||||
1. **Start**: Pass `"record_video": true` in the `open` function
|
||||
2. **Record**: All browser activity is captured throughout the session
|
||||
3. **Stop**: Video is finalized when `close` is called
|
||||
4. **Retrieve**: Video file is returned in the `close` response
|
||||
|
||||
The video captures:
|
||||
- Page loads and navigations
|
||||
- Element interactions (clicks, typing)
|
||||
- Scrolling and animations
|
||||
- Dynamic content changes
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Debugging Failed Automation
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Record automation for debugging
|
||||
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://app.example.com",
|
||||
"record_video": true
|
||||
}' | jq -r '.session_id')
|
||||
|
||||
# Run automation
|
||||
RESULT=$(infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "click", "ref": "@e1"
|
||||
}')
|
||||
|
||||
SUCCESS=$(echo $RESULT | jq -r '.success')
|
||||
if [ "$SUCCESS" != "true" ]; then
|
||||
echo "Action failed!"
|
||||
echo "Message: $(echo $RESULT | jq -r '.message')"
|
||||
|
||||
# Get video for debugging
|
||||
CLOSE_RESULT=$(infsh app run agent-browser --function close --session $SESSION --input '{}')
|
||||
echo "Debug video: $(echo $CLOSE_RESULT | jq -r '.video')"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
infsh app run agent-browser --function close --session $SESSION --input '{}'
|
||||
```
|
||||
|
||||
### Documentation Generation
|
||||
|
||||
Record workflows for user documentation:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Record how-to video
|
||||
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://app.example.com/settings",
|
||||
"record_video": true,
|
||||
"width": 1920,
|
||||
"height": 1080
|
||||
}' | jq -r '.session_id')
|
||||
|
||||
# Add pauses for clarity
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "wait", "wait_ms": 1000
|
||||
}'
|
||||
|
||||
# Step 1: Click settings
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "click", "ref": "@e5"
|
||||
}'
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "wait", "wait_ms": 500
|
||||
}'
|
||||
|
||||
# Step 2: Change setting
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "click", "ref": "@e10"
|
||||
}'
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "wait", "wait_ms": 500
|
||||
}'
|
||||
|
||||
# Step 3: Save
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "click", "ref": "@e15"
|
||||
}'
|
||||
infsh app run agent-browser --function interact --session $SESSION --input '{
|
||||
"action": "wait", "wait_ms": 1000
|
||||
}'
|
||||
|
||||
# Get the video
|
||||
RESULT=$(infsh app run agent-browser --function close --session $SESSION --input '{}')
|
||||
echo "Documentation video: $(echo $RESULT | jq -r '.video')"
|
||||
```
|
||||
|
||||
### Test Evidence for CI/CD
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Record E2E test for CI artifacts
|
||||
|
||||
TEST_NAME="${1:-e2e-test}"
|
||||
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "'"$TEST_URL"'",
|
||||
"record_video": true
|
||||
}' | jq -r '.session_id')
|
||||
|
||||
# Run test steps
|
||||
run_test_steps $SESSION
|
||||
TEST_RESULT=$?
|
||||
|
||||
# Always get video
|
||||
CLOSE_RESULT=$(infsh app run agent-browser --function close --session $SESSION --input '{}')
|
||||
VIDEO=$(echo $CLOSE_RESULT | jq -r '.video')
|
||||
|
||||
# Save to artifacts
|
||||
if [ -n "$CI_ARTIFACTS_DIR" ]; then
|
||||
cp "$VIDEO" "$CI_ARTIFACTS_DIR/${TEST_NAME}.webm"
|
||||
fi
|
||||
|
||||
exit $TEST_RESULT
|
||||
```
|
||||
|
||||
### Monitoring and Auditing
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Record automated task for audit trail
|
||||
|
||||
TASK_ID=$(date +%Y%m%d-%H%M%S)
|
||||
|
||||
SESSION=$(infsh app run agent-browser --function open --session new --input '{
|
||||
"url": "https://admin.example.com",
|
||||
"record_video": true
|
||||
}' | jq -r '.session_id')
|
||||
|
||||
# Perform admin task
|
||||
# ... automation steps ...
|
||||
|
||||
# Save recording
|
||||
RESULT=$(infsh app run agent-browser --function close --session $SESSION --input '{}')
|
||||
VIDEO=$(echo $RESULT | jq -r '.video')
|
||||
|
||||
# Archive for audit
|
||||
mv "$VIDEO" "/audit/recordings/${TASK_ID}.webm"
|
||||
echo "Audit recording saved: ${TASK_ID}.webm"
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Add Strategic Pauses
|
||||
|
||||
Pauses make videos easier to follow:
|
||||
|
||||
```bash
|
||||
# After significant actions, add a pause
|
||||
'{"action": "click", "ref": "@e1"}'
|
||||
'{"action": "wait", "wait_ms": 500}' # Let viewer see result
|
||||
```
|
||||
|
||||
### 2. Use Larger Viewport for Documentation
|
||||
|
||||
```bash
|
||||
'{"url": "...", "record_video": true, "width": 1920, "height": 1080}'
|
||||
```
|
||||
|
||||
### 3. Handle Errors Gracefully
|
||||
|
||||
Always retrieve video even on failure:
|
||||
|
||||
```bash
|
||||
cleanup() {
|
||||
if [ -n "$SESSION" ]; then
|
||||
infsh app run agent-browser --function close --session $SESSION --input '{}' 2>/dev/null
|
||||
fi
|
||||
}
|
||||
trap cleanup EXIT
|
||||
```
|
||||
|
||||
### 4. Combine with Screenshots
|
||||
|
||||
Use screenshots for key frames, video for flow:
|
||||
|
||||
```bash
|
||||
# Record overall flow
|
||||
'{"record_video": true}'
|
||||
|
||||
# Capture key states
|
||||
infsh app run agent-browser --function screenshot --session $SESSION --input '{
|
||||
"full_page": true
|
||||
}'
|
||||
```
|
||||
|
||||
### 5. Don't Record Sensitive Sessions
|
||||
|
||||
Avoid recording when handling credentials:
|
||||
|
||||
```bash
|
||||
if [ "$CONTAINS_SENSITIVE_DATA" = "true" ]; then
|
||||
RECORD="false"
|
||||
else
|
||||
RECORD="true"
|
||||
fi
|
||||
|
||||
'{"url": "...", "record_video": '$RECORD'}'
|
||||
```
|
||||
|
||||
## Output Format
|
||||
|
||||
- **Format**: WebM (VP8/VP9 codec)
|
||||
- **Compatibility**: All modern browsers and video players
|
||||
- **Quality**: Matches viewport size
|
||||
- **Compression**: Efficient for screen content
|
||||
|
||||
## Limitations
|
||||
|
||||
1. **Session-level only** - Can't start/stop mid-session
|
||||
2. **Memory usage** - Long sessions consume more memory
|
||||
3. **File size** - Complex pages with animations produce larger files
|
||||
4. **No audio** - Browser audio is not captured
|
||||
5. **Returned on close** - Video only available after session ends
|
||||
Loading…
Add table
Add a link
Reference in a new issue