Appearance
Browser Automation Examples
This directory contains examples for controlling a headless browser within the AGB session.
Examples
Basic Navigation (basic_navigation.py)
Demonstrates how to start a browser, navigate to a URL, and take a screenshot.
py
#!/usr/bin/env python3
"""
Basic Browser Navigation Example
This example demonstrates fundamental browser operations:
- Creating and initializing a browser session
- Navigating to web pages
- Getting page information (title, URL, etc.)
- Basic Playwright integration
"""
import os
import asyncio
from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser import BrowserOption, BrowserViewport
from playwright.async_api import async_playwright
async def main():
"""Main function demonstrating basic browser navigation."""
# Get API key from environment
api_key = os.getenv("AGB_API_KEY")
if not api_key:
raise ValueError("AGB_API_KEY environment variable not set")
print("🚀 Starting basic browser navigation example...")
# Initialize AGB client
agb = AGB(api_key=api_key)
session = None
browser = None
try:
# Create a session with browser support
print("📦 Creating browser session...")
params = CreateSessionParams(image_id="agb-browser-use-1")
result = agb.create(params)
if not result.success:
raise RuntimeError(f"Failed to create session: {result.error_message}")
session = result.session
print(f"✅ Session created: {session.session_id}")
# Configure browser options
option = BrowserOption(
use_stealth=True,
)
# Initialize browser
print("🌐 Initializing browser...")
success = await session.browser.initialize_async(option)
if not success:
raise RuntimeError("Browser initialization failed")
print("✅ Browser initialized successfully")
# Get CDP endpoint and connect Playwright
endpoint_url = session.browser.get_endpoint_url()
print(f"🔗 CDP endpoint: {endpoint_url}")
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(endpoint_url)
context = browser.contexts[0]
page = await context.new_page()
# Navigate to different websites
websites = [
"https://example.com",
"https://httpbin.org/html",
"https://quotes.toscrape.com"
]
for url in websites:
print(f"\n📍 Navigating to: {url}")
# Navigate to the page
await page.goto(url, wait_until="networkidle")
# Get page information
title = await page.title()
current_url = page.url
print(f" 📄 Title: {title}")
print(f" 🔗 URL: {current_url}")
# Get page content info
body_text = await page.evaluate("document.body.innerText")
text_length = len(body_text.strip())
print(f" 📝 Content length: {text_length} characters")
# Check for common elements
has_forms = await page.evaluate("document.forms.length > 0")
has_images = await page.evaluate("document.images.length > 0")
has_links = await page.evaluate("document.links.length > 0")
print(f" 🔍 Page analysis:")
print(f" - Has forms: {has_forms}")
print(f" - Has images: {has_images}")
print(f" - Has links: {has_links}")
# Wait a moment before next navigation
await asyncio.sleep(2)
# Demonstrate browser navigation methods
print(f"\n🔄 Testing browser navigation...")
try:
# Go back with minimal wait - just wait for navigation to start
await page.go_back(timeout=10000, wait_until="commit") # 10 seconds, wait for navigation to commit
print(f" ⬅️ Went back to: {page.url}")
# Go forward with minimal wait
await page.go_forward(timeout=10000, wait_until="commit") # 10 seconds, wait for navigation to commit
print(f" ➡️ Went forward to: {page.url}")
except Exception as nav_error:
print(f" ⚠️ Navigation test failed: {nav_error}")
print(f" 📍 Current URL: {page.url}")
try:
# Reload page with shorter timeout
await page.reload(timeout=10000) # 10 seconds timeout
print(f" 🔄 Reloaded page: {page.url}")
# Take a screenshot
screenshot_path = "/tmp/navigation_example.png"
await page.screenshot(path=screenshot_path)
print(f" 📸 Screenshot saved to: {screenshot_path}")
except Exception as reload_error:
print(f" ⚠️ Reload/screenshot failed: {reload_error}")
print(f" 📍 Current URL: {page.url}")
await browser.close()
print("✅ Browser closed successfully")
except Exception as e:
print(f"❌ Error occurred: {e}")
if browser:
await browser.close()
raise
finally:
# Clean up session
if session:
agb.delete(session)
print("🧹 Session cleaned up")
print("🎉 Basic navigation example completed successfully!")
if __name__ == "__main__":
asyncio.run(main())Browser Fingerprint Basic Usage (browser_fingerprint_basic_usage.py)
Demonstrates how to use browser fingerprint to avoid detection by anti-bot services. It generates a random, realistic browser fingerprint (e.g., Windows desktop) and verifies the user agent and navigator properties.
py
"""
Example demonstrating Browser Fingerprint basic usage with AGB SDK.
This example shows how to use browser fingerprint to avoid detection by anti-bot services.
It will generate a random, realistic browser fingerprint and make the browser behave more like a real user.
This example will:
1. Create AIBrowser session with random fingerprint and simulate a Windows desktop browser.
2. Use playwright to connect to AIBrowser instance through CDP protocol
3. Verify user agent and navigator properties
"""
import os
import asyncio
from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser.browser import BrowserOption, BrowserFingerprint
from playwright.async_api import async_playwright
async def main():
"""Main function demonstrating browser fingerprint basic usage."""
# Get API key from environment variable
api_key = os.getenv("AGB_API_KEY")
if not api_key:
print("Error: AGB_API_KEY environment variable not set")
return
# Initialize AGB client
print("Initializing AGB client...")
agb = AGB(api_key=api_key)
# Create a session
print("Creating a new session...")
params = CreateSessionParams(
image_id="agb-browser-use-1",
)
session_result = agb.create(params)
if session_result.success:
session = session_result.session
print(f"Session created with ID: {session.session_id}")
"""Create browser fingerprint option
- devices: desktop or mobile
- operating_systems: windows, macos, linux, android, ios
You can specify one or multiple values for each parameter.
But if you specify devices as desktop and operating_systems as android/ios,
the fingerprint feature will not work.
"""
browser_fingerprint = BrowserFingerprint(
devices=["desktop"],
operating_systems=["windows"],
locales=["zh-CN", "zh"]
)
# Create browser option with stealth mode and fingerprint option limit.
# This will help to avoid detection by anti-bot services. It will
# generate a random, realistic browser fingerprint and make the browser
# behave more like a real user.
browser_option = BrowserOption(
use_stealth=True,
fingerprint=browser_fingerprint
)
if await session.browser.initialize_async(browser_option):
endpoint_url = session.browser.get_endpoint_url()
print("endpoint_url =", endpoint_url)
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(endpoint_url)
context = browser.contexts[0]
page = await context.new_page()
# Check user agent.
print("\n--- Check User Agent ---")
await page.goto("https://httpbin.org/user-agent", timeout=120000)
response = await page.evaluate("() => JSON.parse(document.body.textContent)")
user_agent = response.get("user-agent", "")
print(f"User Agent: {user_agent}")
# Check navigator properties.
print("\n--- Check Navigator Properties ---")
nav_info = await page.evaluate("""
() => ({
platform: navigator.platform,
language: navigator.language,
languages: navigator.languages,
webdriver: navigator.webdriver
})
""")
print(f"Platform: {nav_info.get('platform')}")
print(f"Language: {nav_info.get('language')}")
print(f"Languages: {nav_info.get('languages')}")
print(f"WebDriver: {nav_info.get('webdriver')}")
await page.wait_for_timeout(3000)
await browser.close()
# Clean up session
agb.delete(session)
if __name__ == "__main__":
asyncio.run(main())Browser Fingerprint Construct (browser_fingerprint_construct.py)
Shows how to construct a custom FingerprintFormat from a JSON file and apply it to the remote browser. This allows you to fully control the browser fingerprint details.
py
"""
Example demonstrating Browser Fingerprint local sync feature with AGB SDK.
This example shows how to sync local browser fingerprint to remote browser fingerprint.
BrowserFingerprintGenerator has ability to dump local installed chrome browser fingerprint,
and then you can sync it to remote browser fingerprint by using BrowserOption.fingerprint_format.
This example will:
1. Generate local chrome browser fingerprint by BrowserFingerprintGenerator
2. Sync local browser fingerprint to remote browser fingerprint
3. Verify remote browser fingerprint
4. Clean up session
"""
import os
import asyncio
from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser.browser import BrowserOption
from agb.modules.browser.fingerprint import BrowserFingerprintGenerator, FingerprintFormat
from playwright.async_api import async_playwright
async def generate_fingerprint_by_file() -> FingerprintFormat:
"""Generate fingerprint by file."""
with open(os.path.join(os.path.dirname(__file__), "../../../resource/fingerprint.example.json"), "r") as f:
fingerprint_format = FingerprintFormat.load(f.read())
return fingerprint_format
async def main():
"""Main function demonstrating browser fingerprint basic usage."""
# Get API key from environment variable
api_key = os.getenv("AGB_API_KEY")
if not api_key:
print("Error: AGB_API_KEY environment variable not set")
return
# Initialize AGB client
print("Initializing AGB client...")
agb = AGB(api_key=api_key)
# Create a session
print("Creating a new session...")
params = CreateSessionParams(
image_id="agb-browser-use-1",
)
session_result = agb.create(params)
if session_result.success:
session = session_result.session
print(f"Session created with ID: {session.session_id}")
# You can generate fingerprint by file or construct FingerprintFormat by yourself totally.
fingerprint_format = await generate_fingerprint_by_file()
# Create browser option with fingerprint format.
# Fingerprint format is dumped from file by generate_fingerprint_by_file()
# automatically, you can use it to sync to remote browser fingerprint.
browser_option = BrowserOption(
use_stealth=True,
fingerprint_format=fingerprint_format
)
if await session.browser.initialize_async(browser_option):
endpoint_url = session.browser.get_endpoint_url()
print("endpoint_url =", endpoint_url)
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(endpoint_url)
context = browser.contexts[0]
page = await context.new_page()
# Check user agent.
print("\n--- Check User Agent ---")
await page.goto("https://httpbin.org/user-agent", timeout=120000)
response = await page.evaluate("() => JSON.parse(document.body.textContent)")
user_agent = response.get("user-agent", "")
print(f"User Agent: {user_agent}")
assert user_agent == fingerprint_format.fingerprint.navigator.userAgent
print("User Agent constructed correctly")
await page.wait_for_timeout(3000)
await browser.close()
# Clean up session
agb.delete(session)
if __name__ == "__main__":
asyncio.run(main())Browser Fingerprint Local Sync (browser_fingerprint_local_sync.py)
Demonstrates how to sync your local Chrome browser's fingerprint to the remote browser using BrowserFingerprintGenerator. This makes the remote browser behave exactly like your local browser.
py
"""
Example demonstrating Browser Fingerprint local sync feature with AGB SDK.
This example shows how to sync local browser fingerprint to remote browser fingerprint.
BrowserFingerprintGenerator has ability to dump local installed chrome browser fingerprint,
and then you can sync it to remote browser fingerprint by using BrowserOption.fingerprint_format.
This example will:
1. Generate local chrome browser fingerprint by BrowserFingerprintGenerator
2. Sync local browser fingerprint to remote browser fingerprint
3. Verify remote browser fingerprint
4. Clean up session
"""
import os
import asyncio
from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser.browser import BrowserOption
from agb.modules.browser.fingerprint import BrowserFingerprintGenerator
from playwright.async_api import async_playwright
async def main():
"""Main function demonstrating browser fingerprint basic usage."""
# Get API key from environment variable
api_key = os.getenv("AGB_API_KEY")
if not api_key:
print("Error: AGB_API_KEY environment variable not set")
return
# Initialize AGB client
print("Initializing AGB client...")
agb = AGB(api_key=api_key)
# Create a session
print("Creating a new session...")
params = CreateSessionParams(
image_id="agb-browser-use-1",
)
session_result = agb.create(params)
if session_result.success:
session = session_result.session
print(f"Session created with ID: {session.session_id}")
fingerprint_generator = BrowserFingerprintGenerator()
fingerprint_format = await fingerprint_generator.generate_fingerprint()
# Create browser option with fingerprint format.
# Fingerprint format is dumped from local chrome browser by BrowserFingerprintGenerator
# automatically, you can use it to sync to remote browser fingerprint.
browser_option = BrowserOption(
use_stealth=True,
fingerprint_format=fingerprint_format
)
if await session.browser.initialize_async(browser_option):
endpoint_url = session.browser.get_endpoint_url()
print("endpoint_url =", endpoint_url)
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(endpoint_url)
context = browser.contexts[0]
page = await context.new_page()
# Check user agent.
print("\n--- Check User Agent ---")
await page.goto("https://httpbin.org/user-agent", timeout=120000)
response = await page.evaluate("() => JSON.parse(document.body.textContent)")
user_agent = response.get("user-agent", "")
print(f"User Agent: {user_agent}")
print("Please check if User Agent is synced correctly by visiting https://httpbin.org/user-agent in local chrome browser.")
await page.wait_for_timeout(3000)
await browser.close()
# Clean up session
agb.delete(session)
if __name__ == "__main__":
asyncio.run(main())Browser Fingerprint Persistence (browser_fingerprint_persistence.py)
A more advanced example showing how to persist browser fingerprint across multiple sessions using BrowserContext and BrowserFingerprintContext. This is useful for maintaining a consistent browser identity over time.
py
#!/usr/bin/env python3
"""
Integration test for browser fingerprint persistence functionality.
This test verifies that browser fingerprint can be persisted
across sessions using the same ContextId and FingerprintContextId.
"""
import asyncio
import os
import time
import unittest
from agb import AGB
from agb.session_params import CreateSessionParams, BrowserContext
from agb.modules.browser.browser import BrowserOption, BrowserFingerprint, BrowserFingerprintContext
from playwright.async_api import async_playwright
# Global variables for persistent context and fingerprint context
persistent_context = None
persistent_fingerprint_context = None
def get_test_api_key():
"""Get API key for testing"""
api_key = os.environ.get("AGB_API_KEY")
if not api_key:
raise unittest.SkipTest("AGB_API_KEY environment variable not set")
return api_key
def is_windows_user_agent(user_agent: str) -> bool:
if not user_agent:
return False
user_agent_lower = user_agent.lower()
windows_indicators = [
'windows nt',
'win32',
'win64',
'windows',
'wow64'
]
return any(indicator in user_agent_lower for indicator in windows_indicators)
def run_as_first_time():
"""Run as first time"""
print("="*20)
print("Run as first time")
print("="*20)
global persistent_context, persistent_fingerprint_context
api_key = os.environ.get("AGB_API_KEY")
if not api_key:
print("Error: AGB_API_KEY environment variable not set")
return
agb = AGB(api_key)
# Create a browser context for first time
session_context_name = f"test-browser-context-{int(time.time())}"
context_result = agb.context.get(session_context_name, True)
if not context_result.success or not context_result.context:
print("Failed to create browser context")
return
persistent_context = context_result.context
print(f"Created browser context: {persistent_context.name} (ID: {persistent_context.id})")
# Create a browser fingerprint context for first time
fingerprint_context_name = f"test-browser-fingerprint-{int(time.time())}"
fingerprint_context_result = agb.context.get(fingerprint_context_name, True)
if not fingerprint_context_result.success or not fingerprint_context_result.context:
print("Failed to create fingerprint context")
return
persistent_fingerprint_context = fingerprint_context_result.context
print(f"Created fingerprint context: {persistent_fingerprint_context.name} (ID: {persistent_fingerprint_context.id})")
# Create session with BrowserContext and FingerprintContext
print(f"Creating session with browser context ID: {persistent_context.id} "
f"and fingerprint context ID: {persistent_fingerprint_context.id}")
fingerprint_context = BrowserFingerprintContext(persistent_fingerprint_context.id)
browser_context = BrowserContext(persistent_context.id, auto_upload=True, fingerprint_context=fingerprint_context)
params = CreateSessionParams(
image_id="agb-browser-use-1",
browser_context=browser_context
)
session_result = agb.create(params)
if not session_result.success or not session_result.session:
print(f"Failed to create first session: {session_result.error_message}")
return
session = session_result.session
print(f"First session created with ID: {session.session_id}")
# Get browser object and generate fingerprint for persistence
async def first_session_operations():
# Initialize browser with fingerprint persistent enabled and set fingerprint generation options
browser_option = BrowserOption(
use_stealth=True,
fingerprint_persistent=True,
fingerprint=BrowserFingerprint(
devices=["desktop"],
operating_systems=["windows"],
locales=["zh-CN"],
),
)
init_success = await session.browser.initialize_async(browser_option)
if not init_success:
print("Failed to initialize browser")
return
print("First session browser initialized successfully")
# Get endpoint URL
endpoint_url = session.browser.get_endpoint_url()
if not endpoint_url:
print("Failed to get browser endpoint URL")
return
print(f"First session browser endpoint URL: {endpoint_url}")
# Connect with playwright, test first session fingerprint
print("Opening https://httpbin.org/user-agent and test user agent...")
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(endpoint_url)
context = browser.contexts[0] if browser.contexts else await browser.new_context()
page = await context.new_page()
await page.goto("https://httpbin.org/user-agent", timeout=120000)
response = await page.evaluate("() => JSON.parse(document.body.textContent)")
user_agent = response["user-agent"]
print("user_agent =", user_agent)
is_windows = is_windows_user_agent(user_agent)
if not is_windows:
print("Failed to get windows user agent")
return
await context.close()
print("First session browser fingerprint check completed")
# Run first session operations
asyncio.run(first_session_operations())
# Delete first session with syncContext=True
print("Deleting first session with syncContext=True...")
delete_result = agb.delete(session, sync_context=True)
print(f"First session deleted successfully (RequestID: {delete_result.request_id})")
def run_as_second_time():
"""Run as second time"""
print("="*20)
print("Run as second time")
print("="*20)
global persistent_context, persistent_fingerprint_context
api_key = os.environ.get("AGB_API_KEY")
if not api_key:
print("Error: AGB_API_KEY environment variable not set")
return
agb = AGB(api_key)
# Create second session with same browser context and fingerprint context
print(f"Creating second session with same browser context ID: {persistent_context.id} "
f"and fingerprint context ID: {persistent_fingerprint_context.id}")
fingerprint_context = BrowserFingerprintContext(persistent_fingerprint_context.id)
browser_context = BrowserContext(persistent_context.id, auto_upload=True, fingerprint_context=fingerprint_context)
params = CreateSessionParams(
image_id="agb-browser-use-1",
browser_context=browser_context
)
session_result = agb.create(params)
if not session_result.success or not session_result.session:
print(f"Failed to create second session: {session_result.error_message}")
return
session = session_result.session
assert session is not None # Type narrowing for linter
print(f"Second session created with ID: {session.session_id}")
# Get browser object and check if second session fingerprint is the same as first session
async def second_session_operations():
# Initialize browser with fingerprint persistent enabled but not specific fingerprint generation options
browser_option = BrowserOption(
use_stealth=True,
fingerprint_persistent=True,
)
init_success = await session.browser.initialize_async(browser_option)
if not init_success:
print("Failed to initialize browser in second session")
return
print("Second session browser initialized successfully")
# Get endpoint URL
endpoint_url = session.browser.get_endpoint_url()
if not endpoint_url:
print("Failed to get browser endpoint URL in second session")
return
print(f"Second session browser endpoint URL: {endpoint_url}")
# Connect with playwright and test second session fingerprint
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(endpoint_url)
context = browser.contexts[0] if browser.contexts else await browser.new_context()
page = await context.new_page()
await page.goto("https://httpbin.org/user-agent", timeout=120000)
response = await page.evaluate("() => JSON.parse(document.body.textContent)")
user_agent = response["user-agent"]
print("user_agent =", user_agent)
is_windows = is_windows_user_agent(user_agent)
if not is_windows:
print("Failed to get windows user agent in second session")
return
print(f"SUCCESS: fingerprint persisted correctly!")
await context.close()
print("Second session browser fingerprint check completed")
# Run second session operations
asyncio.run(second_session_operations())
# Delete second session with syncContext=True
print("Deleting second session with syncContext=True...")
delete_result = agb.delete(session, sync_context=True)
print(f"Second session deleted successfully (RequestID: {delete_result.request_id})")
def main():
"""Test browser fingerprint persist across sessions with the same browser and fingerprint context."""
run_as_first_time()
time.sleep(3)
run_as_second_time()
if __name__ == "__main__":
main()Browser Type Selection (browser_type_example.py)
Demonstrates how to select between Chrome and Chromium browsers when using computer use images. Shows browser type configuration, verification, and best practices for choosing the right browser.
py
"""
Browser Type Selection Example
This example demonstrates how to select between Chrome and Chromium browsers
when using computer use images in AGB.
Features demonstrated:
- Chrome browser selection
- Chromium browser selection
- Default browser (None)
- Browser type verification
- Configuration comparison
Note: The browser_type option is only available for computer use images.
"""
import os
import asyncio
from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser.browser import BrowserOption, BrowserViewport
from playwright.async_api import async_playwright
async def test_browser_type(browser_type: str | None, description: str):
"""Test a specific browser type configuration."""
print(f"\n{'='*60}")
print(f"Testing: {description}")
print(f"{'='*60}")
api_key = os.getenv("AGB_API_KEY")
if not api_key:
raise RuntimeError("AGB_API_KEY environment variable not set")
agb = AGB(api_key=api_key)
# Create session with computer use image
print("\n1. Creating session with computer use image...")
params = CreateSessionParams(image_id="agb-linux-test-5")
result = agb.create(params)
if not result.success:
raise RuntimeError(f"Failed to create session: {result.error_message}")
session = result.session
print(f" ✓ Session created: {session.session_id}")
try:
# Initialize browser with specified type
print(f"\n2. Initializing browser with type: {browser_type or 'default (None)'}")
option = BrowserOption(
browser_type=browser_type,
)
success = await session.browser.initialize_async(option)
if not success:
raise RuntimeError("Browser initialization failed")
print(f" ✓ Browser initialized successfully")
# Get endpoint URL
endpoint_url = session.browser.get_endpoint_url()
print(f"\n3. CDP endpoint: {endpoint_url[:50]}...")
# Connect Playwright and verify browser
print("\n4. Connecting to browser via CDP...")
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(endpoint_url)
context = browser.contexts[0]
page = await context.new_page()
# Navigate to a page that shows browser info
print(" ✓ Connected successfully")
print("\n5. Verifying browser configuration...")
await page.goto("https://www.whatismybrowser.com/")
await page.wait_for_load_state("networkidle")
# Get browser information
user_agent = await page.evaluate("navigator.userAgent")
viewport_width = await page.evaluate("window.innerWidth")
viewport_height = await page.evaluate("window.innerHeight")
print(f"\n Browser Information:")
print(f" - User Agent: {user_agent[:80]}...")
print(f" - Viewport: {viewport_width} x {viewport_height}")
print(f" - Configured Type: {browser_type or 'default'}")
# Check if Chrome or Chromium is in user agent
if "Chrome" in user_agent:
if "Chromium" in user_agent:
detected = "Chromium"
else:
detected = "Chrome"
print(f" - Detected Browser: {detected}")
await browser.close()
print(f"\n ✓ Test completed successfully for {description}")
finally:
print("\n6. Cleaning up...")
session.delete()
print(" ✓ Session deleted")
async def main():
"""Run browser type examples."""
print("Browser Type Selection Example")
print("=" * 60)
print("\nThis example demonstrates browser type selection in AGB.")
print("Note: browser_type is only available for computer use images.")
# Test 1: Chrome browser
await test_browser_type(
browser_type="chrome",
description="Chrome Browser (Google Chrome)"
)
await asyncio.sleep(2) # Brief pause between tests
# Test 2: Chromium browser
await test_browser_type(
browser_type="chromium",
description="Chromium Browser (Open Source)"
)
await asyncio.sleep(2) # Brief pause between tests
# Test 3: Default (None)
await test_browser_type(
browser_type=None,
description="Default Browser (Platform decides)"
)
print("\n" + "=" * 60)
print("All browser type tests completed successfully!")
print("=" * 60)
# Summary
print("\nSummary:")
print("- Chrome: Use when you need Google Chrome specific features")
print("- Chromium: Use for open-source, lighter resource usage")
print("- Default (None): Let the platform choose the optimal browser")
print("\nBest Practice: Use None unless you have a specific requirement")
async def quick_example():
"""Quick example showing the most common usage."""
print("\n" + "=" * 60)
print("Quick Example: Using Chrome Browser")
print("=" * 60)
api_key = os.getenv("AGB_API_KEY")
agb = AGB(api_key=api_key)
# Create session
params = CreateSessionParams(image_id="agb-linux-test-5")
result = agb.create(params)
session = result.session
try:
# Simply specify browser_type in BrowserOption
option = BrowserOption(browser_type="chrome")
success = await session.browser.initialize_async(option)
if success:
print("✓ Chrome browser initialized successfully")
# Get endpoint and use with Playwright
endpoint_url = session.browser.get_endpoint_url()
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(endpoint_url)
page = await browser.contexts[0].new_page()
await page.goto("https://example.com")
title = await page.title()
print(f"✓ Page title: {title}")
await browser.close()
finally:
session.delete()
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Browser Type Selection Example")
parser.add_argument(
"--quick",
action="store_true",
help="Run quick example only"
)
args = parser.parse_args()
if args.quick:
asyncio.run(quick_example())
else:
asyncio.run(main())Browser Command Arguments (browser_command_args.py)
Shows how to launch the browser with custom command-line arguments and a default navigation URL. Useful for disabling specific Chrome features or starting the browser on a specific page.
py
"""
Example demonstrating Browser Launch with Custom Command Arguments and
go to Default Navigation URL with AGB SDK.
This example shows how to configure browser with custom command arguments
and go to default navigation URL:
- Create AIBrowser session with custom command arguments and go to default navigation URL
- Use playwright to connect to AIBrowser instance through CDP protocol
- Verify the browser navigated to the default URL
- Test custom command arguments effects
"""
import os
import asyncio
from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser.browser import BrowserOption, BrowserFingerprint
from playwright.async_api import async_playwright
async def main():
# Get API key from environment variable
api_key = os.getenv("AGB_API_KEY")
if not api_key:
print("Error: AGB_API_KEY environment variable not set")
return
# Initialize AGB client
print("Initializing AGB client...")
agb = AGB(api_key=api_key)
# Create a session
print("Creating a new session...")
params = CreateSessionParams(
image_id="agb-browser-use-1"
)
session_result = agb.create(params)
if session_result.success:
session = session_result.session
print(f"Session created with ID: {session.session_id}")
# Create browser option with user defined cmd args and default navigate url
browser_option = BrowserOption(
cmd_args=["--disable-features=PrivacySandboxSettings4"],
default_navigate_url="chrome://version/",
)
print("Browser configuration:")
print("- Command arguments:", browser_option.cmd_args)
print("- Default navigate URL:", browser_option.default_navigate_url)
if await session.browser.initialize_async(browser_option):
print("Browser initialized successfully")
# Get browser endpoint URL
endpoint_url = session.browser.get_endpoint_url()
print(f"endpoint_url = {endpoint_url}")
# Use Playwright to connect and validate
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(endpoint_url)
context = browser.contexts[0]
page = context.pages[0]
try:
# Check if browser navigated to default URL
print("\n--- Check Default Navigation ---")
await asyncio.sleep(2) # Wait for navigation
current_url = page.url
print(f"Current URL: {current_url}")
if "chrome://version/" in current_url:
print("✓ Browser successfully navigated to default URL")
else:
print("✗ Browser did not navigate to default URL")
# Test command arguments effect by checking Chrome version page
if "chrome://version/" in current_url:
print("\n--- Check Chrome Version Info ---")
version_info = await page.evaluate("""
() => {
const versionElement = document.querySelector('#version');
const commandLineElement = document.querySelector('#command_line');
return {
version: versionElement ? versionElement.textContent : 'Not found',
commandLine: commandLineElement ? commandLineElement.textContent : 'Not found'
};
}
""")
print(f"Chrome Version: {version_info['version']}")
print(f"Command Line: {version_info['commandLine']}")
if "--disable-features=PrivacySandboxSettings4" in version_info['commandLine']:
print("✓ Custom command argument found in browser")
else:
print("✗ Custom command argument not found in browser")
await asyncio.sleep(3)
finally:
await browser.close()
session.browser.destroy()
else:
print("Failed to initialize browser")
# Clean up session
agb.delete(session)
else:
print("Failed to create session", session_result.error_message)
if __name__ == "__main__":
asyncio.run(main())Natural Language Actions (natural_language_actions.py)
Demonstrates how to use natural language instructions (e.g., "Click the 'Sign Up' button") to interact with the page, which simplifies automation logic.
py
#!/usr/bin/env python3
"""
Natural Language Actions Example
This example demonstrates AI-powered browser control using natural language:
- Using natural language to describe actions
- Complex multi-step operations
- Handling dynamic page content
- Error recovery with AI assistance
"""
import os
import asyncio
from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser import BrowserOption, BrowserViewport, ActOptions
from agb.exceptions import BrowserError
from playwright.async_api import async_playwright
async def main():
"""Main function demonstrating natural language browser actions."""
# Get API key from environment
api_key = os.getenv("AGB_API_KEY")
if not api_key:
raise ValueError("AGB_API_KEY environment variable not set")
print("🤖 Starting natural language actions example...")
# Initialize AGB client
agb = AGB(api_key=api_key)
session = None
browser = None
try:
# Create a session with browser support
print("📦 Creating browser session...")
params = CreateSessionParams(image_id="agb-browser-use-1")
result = agb.create(params)
if not result.success:
raise RuntimeError(f"Failed to create session: {result.error_message}")
session = result.session
print(f"✅ Session created: {session.session_id}")
# Configure browser with stealth mode
option = BrowserOption(
use_stealth=True,
viewport=BrowserViewport(width=1366, height=768)
)
# Initialize browser
print("🌐 Initializing browser...")
success = await session.browser.initialize_async(option)
if not success:
raise RuntimeError("Browser initialization failed")
print("✅ Browser initialized successfully")
# Get CDP endpoint and connect Playwright
endpoint_url = session.browser.get_endpoint_url()
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(endpoint_url)
page = await browser.new_page()
# Example 1: Search on a more reliable site
print("\n🔍 Example 1: Search and Navigation")
try:
await page.goto("https://httpbin.org/html", wait_until="domcontentloaded", timeout=15000)
# Use natural language to interact with the page
search_result = await session.browser.agent.act_async(ActOptions(
action="Find and click on any link on this page"
), page)
print(f" Link click result: {search_result.success}")
if search_result.success:
print(f" Message: {search_result.message}")
print(f" Current URL: {page.url}")
else:
print(f" Error: {search_result.message}")
except Exception as e:
print(f" ⚠️ Search example failed: {e}")
print(f" 📍 Current URL: {page.url}")
# Example 2: Form Interaction
print("\n📝 Example 2: Form Interaction")
await page.goto("https://httpbin.org/forms/post", wait_until="domcontentloaded", timeout=15000)
# Fill out a form using natural language
form_actions = [
"Fill the 'custname' field with 'John Doe'",
"Fill the 'custtel' field with '123-456-7890'",
"Fill the 'custemail' field with 'john.doe@example.com'",
"Select 'Large' from the size dropdown",
"Check the 'Bacon' checkbox",
"Fill the delivery instructions with 'Please ring the doorbell'"
]
for action_text in form_actions:
result = await session.browser.agent.act_async(ActOptions(
action=action_text
), page)
print(f" Action: {action_text}")
print(f" Result: {'✅' if result.success else '❌'} {result.message}")
if not result.success:
print(f" Retrying with more specific instruction...")
# Retry with more specific instruction
retry_result = await session.browser.agent.act_async(ActOptions(
action=f"Find and {action_text.lower()}"
), page)
print(f" Retry result: {'✅' if retry_result.success else '❌'} {retry_result.message}")
# Submit the form
submit_result = await session.browser.agent.act_async(ActOptions(
action="Click the submit button to submit the form"
), page)
print(f" Form submission: {'✅' if submit_result.success else '❌'} {submit_result.message}")
# Example 3: Dynamic Content Interaction
print("\n🔄 Example 3: Dynamic Content")
await page.goto("https://quotes.toscrape.com", wait_until="domcontentloaded", timeout=15000)
# Scroll and interact with dynamic content
scroll_result = await session.browser.agent.act_async(ActOptions(
action="Scroll down to see more quotes on the page"
), page)
print(f" Scroll result: {'✅' if scroll_result.success else '❌'} {scroll_result.message}")
# Click on a tag to filter quotes
tag_result = await session.browser.agent.act_async(ActOptions(
action="Click on any tag link to filter quotes by that tag"
), page)
print(f" Tag click result: {'✅' if tag_result.success else '❌'} {tag_result.message}")
if tag_result.success:
print(f" Current URL after tag click: {page.url}")
# Example 4: Complex Multi-Step Workflow
print("\n🔗 Example 4: Multi-Step Workflow")
await page.goto("https://quotes.toscrape.com", wait_until="domcontentloaded", timeout=15000)
# Multi-step workflow with error handling
workflow_steps = [
{
"action": "Find and click on the 'Next' button to go to the next page",
"description": "Navigate to next page"
},
{
"action": "Click on the author name of the first quote to view author details",
"description": "View author details"
},
{
"action": "Go back to the previous page using browser navigation",
"description": "Return to quotes page"
}
]
for i, step in enumerate(workflow_steps, 1):
print(f" Step {i}: {step['description']}")
result = await session.browser.agent.act_async(ActOptions(
action=step['action']
), page)
print(f" Result: {'✅' if result.success else '❌'} {result.message}")
if result.success:
print(f" Current URL: {page.url}")
await asyncio.sleep(2) # Wait between steps
else:
print(f" Step failed, continuing with next step...")
# Example 5: Conditional Actions
print("\n🤔 Example 5: Conditional Actions")
await page.goto("https://httpbin.org/html", wait_until="domcontentloaded", timeout=15000)
# Perform conditional actions based on page content
conditional_result = await session.browser.agent.act_async(ActOptions(
action="If there is a link that says 'Herman Melville', click on it. Otherwise, just scroll down the page"
), page)
print(f" Conditional action result: {'✅' if conditional_result.success else '❌'} {conditional_result.message}")
await browser.close()
print("✅ Browser closed successfully")
except BrowserError as e:
print(f"❌ Browser error occurred: {e}")
if browser:
await browser.close()
except Exception as e:
print(f"❌ Unexpected error occurred: {e}")
if browser:
await browser.close()
raise
finally:
# Clean up session
if session:
agb.delete(session)
print("🧹 Session cleaned up")
print("🎉 Natural language actions example completed!")
if __name__ == "__main__":
asyncio.run(main())Data Extraction (data_extraction.py)
Shows how to extract structured data from web pages using the extract tool.
py
#!/usr/bin/env python3
"""
Data Extraction Example
This example demonstrates structured data extraction from web pages:
- Defining data schemas with Pydantic
- Extracting product information
- Handling lists and nested data
- Text-based vs DOM-based extraction
"""
import os
import asyncio
from typing import List, Optional
from pydantic import BaseModel, Field
from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser import BrowserOption, BrowserViewport, ExtractOptions
from agb.exceptions import BrowserError
from playwright.async_api import async_playwright
# Define data schemas using Pydantic
class Quote(BaseModel):
"""Schema for a single quote."""
text: str = Field(description="The quote text")
author: str = Field(description="The author of the quote")
tags: List[str] = Field(description="Tags associated with the quote")
class QuotesList(BaseModel):
"""Schema for a list of quotes."""
quotes: List[Quote] = Field(description="List of quotes on the page")
class Product(BaseModel):
"""Schema for product information."""
name: str = Field(description="Product name")
price: str = Field(description="Product price")
rating: Optional[float] = Field(description="Product rating", default=None)
availability: Optional[str] = Field(description="Product availability status", default=None)
description: Optional[str] = Field(description="Product description", default=None)
class ProductList(BaseModel):
"""Schema for a list of products."""
products: List[Product] = Field(description="List of products")
class NewsArticle(BaseModel):
"""Schema for news article."""
title: str = Field(description="Article title")
summary: str = Field(description="Article summary or excerpt")
author: Optional[str] = Field(description="Article author", default=None)
date: Optional[str] = Field(description="Publication date", default=None)
url: Optional[str] = Field(description="Article URL", default=None)
class NewsList(BaseModel):
"""Schema for a list of news articles."""
articles: List[NewsArticle] = Field(description="List of news articles")
async def main():
"""Main function demonstrating data extraction capabilities."""
# Get API key from environment
api_key = os.getenv("AGB_API_KEY")
if not api_key:
raise ValueError("AGB_API_KEY environment variable not set")
print("📊 Starting data extraction example...")
# Initialize AGB client
agb = AGB(api_key=api_key)
session = None
browser = None
try:
# Create a session with browser support
print("📦 Creating browser session...")
params = CreateSessionParams(image_id="agb-browser-use-1")
result = agb.create(params)
if not result.success:
raise RuntimeError(f"Failed to create session: {result.error_message}")
session = result.session
print(f"✅ Session created: {session.session_id}")
# Configure browser
option = BrowserOption(
use_stealth=True,
viewport=BrowserViewport(width=1366, height=768)
)
# Initialize browser
print("🌐 Initializing browser...")
success = await session.browser.initialize_async(option)
if not success:
raise RuntimeError("Browser initialization failed")
print("✅ Browser initialized successfully")
# Get CDP endpoint and connect Playwright
endpoint_url = session.browser.get_endpoint_url()
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(endpoint_url)
page = await browser.new_page()
# Example 1: Extract quotes from quotes.toscrape.com
print("\n📝 Example 1: Extracting Quotes")
await page.goto("https://quotes.toscrape.com")
# Extract all quotes on the page
success, quotes_data = await session.browser.agent.extract_async(ExtractOptions(
instruction="Extract all quotes from this page including the quote text, author, and tags",
schema=QuotesList,
use_text_extract=True
), page)
if success and quotes_data:
print(f" ✅ Successfully extracted {len(quotes_data.quotes)} quotes")
for i, quote in enumerate(quotes_data.quotes[:3], 1): # Show first 3
print(f" Quote {i}:")
print(f" Text: {quote.text[:100]}...")
print(f" Author: {quote.author}")
print(f" Tags: {', '.join(quote.tags)}")
else:
print(" ❌ Failed to extract quotes")
# Example 2: Extract with CSS selector focus
print("\n🎯 Example 2: Focused Extraction with CSS Selector")
# Extract only the first quote using CSS selector
success, focused_quotes = await session.browser.agent.extract_async(ExtractOptions(
instruction="Extract the quote information from the selected quote container",
schema=Quote,
selector=".quote:first-child", # Focus on first quote only
use_text_extract=False
), page)
if success and focused_quotes:
print(f" ✅ Successfully extracted focused quote:")
print(f" Text: {focused_quotes.text}")
print(f" Author: {focused_quotes.author}")
print(f" Tags: {', '.join(focused_quotes.tags)}")
else:
print(" ❌ Failed to extract focused quote")
# Example 3: Extract from a different page structure
print("\n📰 Example 3: Extracting from Different Page Structure")
await page.goto("https://httpbin.org/html")
# Define a simple schema for this page
class SimplePageInfo(BaseModel):
title: str = Field(description="Page title")
headings: List[str] = Field(description="All headings on the page")
links: List[str] = Field(description="All link texts on the page")
success, page_info = await session.browser.agent.extract_async(ExtractOptions(
instruction="Extract the page title, all headings, and all link texts from this HTML page",
schema=SimplePageInfo,
use_text_extract=True
), page)
if success and page_info:
print(f" ✅ Successfully extracted page information:")
print(f" Title: {page_info.title}")
print(f" Headings: {page_info.headings}")
print(f" Links: {page_info.links}")
else:
print(" ❌ Failed to extract page information")
# Example 4: Complex nested data extraction
print("\n🏗️ Example 4: Complex Nested Data")
await page.goto("https://quotes.toscrape.com")
# Define a more complex schema
class AuthorInfo(BaseModel):
name: str = Field(description="Author name")
quote_count: int = Field(description="Number of quotes by this author on the page")
class PageAnalysis(BaseModel):
total_quotes: int = Field(description="Total number of quotes on the page")
authors: List[AuthorInfo] = Field(description="Information about authors")
unique_tags: List[str] = Field(description="All unique tags used on the page")
page_title: str = Field(description="Page title")
success, analysis = await session.browser.agent.extract_async(ExtractOptions(
instruction="Analyze this quotes page and extract comprehensive information about quotes, authors, tags, and page structure",
schema=PageAnalysis,
use_text_extract=True,
dom_settle_timeout_ms=2000
), page)
if success and analysis:
print(f" ✅ Successfully extracted complex analysis:")
print(f" Page title: {analysis.page_title}")
print(f" Total quotes: {analysis.total_quotes}")
print(f" Number of authors: {len(analysis.authors)}")
print(f" Unique tags: {len(analysis.unique_tags)}")
print(f" Sample authors:")
for author in analysis.authors[:3]:
print(f" - {author.name}: {author.quote_count} quotes")
else:
print(" ❌ Failed to extract complex analysis")
# Example 5: Error handling and retry with different approaches
print("\n🔄 Example 5: Error Handling and Retry Strategies")
# Try to extract from a page that might not have the expected structure
await page.goto("https://example.com")
# First attempt with strict schema
class StrictPageInfo(BaseModel):
title: str = Field(description="Page title")
main_content: str = Field(description="Main content text")
links: List[str] = Field(description="All links on the page")
success, strict_info = await session.browser.agent.extract_async(ExtractOptions(
instruction="Extract title, main content, and all links from this page",
schema=StrictPageInfo,
use_text_extract=True
), page)
if success and strict_info:
print(f" ✅ Strict extraction successful:")
print(f" Title: {strict_info.title}")
print(f" Content length: {len(strict_info.main_content)} characters")
print(f" Links found: {len(strict_info.links)}")
else:
print(" ⚠️ Strict extraction failed, trying flexible approach...")
# Fallback with more flexible schema
class FlexiblePageInfo(BaseModel):
title: Optional[str] = Field(description="Page title if available", default=None)
content: Optional[str] = Field(description="Any text content found", default=None)
has_links: bool = Field(description="Whether the page has any links", default=False)
success, flexible_info = await session.browser.agent.extract_async(ExtractOptions(
instruction="Extract any available information from this page, being flexible about what's available",
schema=FlexiblePageInfo,
use_text_extract=True
), page)
if success and flexible_info:
print(f" ✅ Flexible extraction successful:")
print(f" Title: {flexible_info.title}")
print(f" Has content: {flexible_info.content is not None}")
print(f" Has links: {flexible_info.has_links}")
else:
print(" ❌ Both extraction attempts failed")
# Example 6: Batch extraction from multiple elements
print("\n📦 Example 6: Batch Extraction")
await page.goto("https://quotes.toscrape.com")
# Extract each quote individually to demonstrate batch processing
quote_containers = await page.query_selector_all(".quote")
print(f" Found {len(quote_containers)} quote containers")
extracted_quotes = []
for i, container in enumerate(quote_containers[:3]): # Process first 3
print(f" Processing quote {i+1}...")
# Focus extraction on this specific container
container_id = f"quote-{i}"
await container.evaluate(f"(element) => element.id = '{container_id}'")
success, quote = await session.browser.agent.extract_async(ExtractOptions(
instruction=f"Extract the quote information from the element with id '{container_id}'",
schema=Quote,
selector=f"#{container_id}",
use_text_extract=False
), page)
if success and quote:
extracted_quotes.append(quote)
print(f" ✅ Extracted: {quote.author}")
else:
print(f" ❌ Failed to extract quote {i+1}")
print(f" 📊 Batch extraction completed: {len(extracted_quotes)} quotes extracted")
await browser.close()
print("✅ Browser closed successfully")
except BrowserError as e:
print(f"❌ Browser error occurred: {e}")
if browser:
await browser.close()
except Exception as e:
print(f"❌ Unexpected error occurred: {e}")
if browser:
await browser.close()
raise
finally:
# Clean up session
if session:
agb.delete(session)
print("🧹 Session cleaned up")
print("🎉 Data extraction example completed!")
async def demonstrate_advanced_extraction_patterns():
"""Demonstrate advanced extraction patterns and best practices."""
print("\n🔬 Advanced Extraction Patterns:")
print(" 1. Use specific CSS selectors for focused extraction")
print(" 2. Define flexible schemas with Optional fields for robust extraction")
print(" 3. Implement retry logic with different extraction strategies")
print(" 4. Use text-based extraction for better performance on content-heavy pages")
print(" 5. Use DOM-based extraction for precise element targeting")
print(" 6. Handle nested data structures with proper Pydantic models")
print(" 7. Implement batch processing for multiple similar elements")
print(" 8. Use appropriate timeouts based on page complexity")
if __name__ == "__main__":
asyncio.run(main())CAPTCHA Solving (captcha_tongcheng.py)
A more advanced example showing how the agent can handle complex interactions like CAPTCHA challenges on real-world sites.
py
"""
Example demonstrating AIBrowser capabilities with AGB SDK.
This example shows how to use AIBrowser to solve captcha automatically, including:
- Create AIBrowser session
- Use playwright to connect to AIBrowser instance through CDP protocol
- Set solve_captchas to be True and goto tongcheng website
- We will encounter a captcha and we will solve it automatically.
"""
import os
import time
import asyncio
import base64
from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser import BrowserOption, BrowserViewport
from playwright.async_api import async_playwright
async def main():
# Get API key from environment variable
api_key = os.getenv("AGB_API_KEY")
if not api_key:
print("Error: AGB_API_KEY environment variable not set")
return
# Initialize AGB client
print("Initializing AGB client...")
agb = AGB(api_key=api_key)
# Create a session
print("Creating a new session...")
params = CreateSessionParams(
image_id="agb-browser-use-1", # Updated image ID for AGB
)
session_result = agb.create(params)
if session_result.success:
session = session_result.session
print(f"Session created with ID: {session.session_id}")
# Configure browser options
browser_option = BrowserOption(
solve_captchas=True
)
# Initialize browser
print("🌐 Initializing browser...")
success = await session.browser.initialize_async(browser_option)
if not success:
print("❌ Browser initialization failed")
return
print("✅ Browser initialized successfully")
endpoint_url = session.browser.get_endpoint_url()
print(f"🔗 CDP endpoint: {endpoint_url}")
browser = None
try:
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(endpoint_url)
context = browser.contexts[0]
page = await context.new_page()
print("🌐 Navigating to tongcheng site...")
url = "https://passport.ly.com/Passport/GetPassword"
await page.goto(url, wait_until="domcontentloaded")
# Use selector to locate input field
input_element = await page.wait_for_selector('#name_in', timeout=10000)
print("Found login name input field: #name_in")
# Clear input field and enter phone number
phone_number = "15011556760"
print(f"Entering phone number: {phone_number}")
await input_element.click()
await input_element.fill("") # Clear input field
await input_element.type(phone_number)
print("Waiting for captcha")
# Wait a moment to ensure input is complete
await asyncio.sleep(1)
print("Clicking next step button...")
await page.click('#next_step1')
# Listen for captcha processing messages
captcha_solving_started = False
captcha_solving_finished = False
# Listen for console messages
def handle_console(msg):
nonlocal captcha_solving_started, captcha_solving_finished
print(f"🔍 Received console message: {msg.text}")
if msg.text == "wuying-captcha-solving-started":
captcha_solving_started = True
print("🎯 Setting captchaSolvingStarted = true")
# Use asyncio.create_task for async execution
asyncio.create_task(page.evaluate("window.captchaSolvingStarted = true; window.captchaSolvingFinished = false;"))
elif msg.text == "wuying-captcha-solving-finished":
captcha_solving_finished = True
print("✅ Setting captchaSolvingFinished = true")
# Use asyncio.create_task for async execution
asyncio.create_task(page.evaluate("window.captchaSolvingFinished = true;"))
page.on("console", handle_console)
# Wait 1 second first, then check if captcha processing has started
try:
await asyncio.sleep(1)
await page.wait_for_function("() => window.captchaSolvingStarted === true", timeout=1000)
print("🎯 Detected captcha processing started, waiting for completion...")
# If start is detected, wait for completion (max 30 seconds)
try:
await page.wait_for_function("() => window.captchaSolvingFinished === true", timeout=30000)
print("✅ Captcha processing completed")
except:
print("⚠️ Captcha processing timeout, may still be in progress")
except:
print("⏭️ No captcha processing detected, continuing execution")
await asyncio.sleep(2)
print("Test completed")
# Keep browser open for a while to observe results
await asyncio.sleep(5)
# Take screenshot and print base64, can be pasted directly into Chrome address bar
try:
screenshot_bytes = await page.screenshot(full_page=False)
b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
print("page_screenshot_base64 = data:image/png;base64,", b64)
except Exception as e:
print("screenshot failed:", e)
await browser.close()
print("✅ Browser closed successfully")
except Exception as e:
print(f"❌ Error occurred: {e}")
if browser:
await browser.close()
raise
else:
print(f"❌ Failed to create session: {session_result.error_message}")
return
# Clean up session
if session_result.success:
agb.delete(session)
print("🧹 Session cleaned up")
print("🎉 Captcha solving example completed successfully!")
if __name__ == "__main__":
asyncio.run(main())Notes
- Browser sessions consume more memory than standard code execution sessions.
- Screenshots are returned as base64 encoded strings.