Skip to content

Browser Automation Examples

This directory contains examples for controlling a headless browser within the AGB session.

Examples

Basic Navigation (basic_navigation.py)

Demonstrates how to start a browser, navigate to a URL, and take a screenshot.

py
#!/usr/bin/env python3
"""
Basic Browser Navigation Example

This example demonstrates fundamental browser operations:
- Creating and initializing a browser session
- Navigating to web pages
- Getting page information (title, URL, etc.)
- Basic Playwright integration
"""

import os
import asyncio
from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser import BrowserOption, BrowserViewport
from playwright.async_api import async_playwright


async def main():
    """Main function demonstrating basic browser navigation."""

    # Get API key from environment
    api_key = os.getenv("AGB_API_KEY")
    if not api_key:
        raise ValueError("AGB_API_KEY environment variable not set")

    print("🚀 Starting basic browser navigation example...")

    # Initialize AGB client
    agb = AGB(api_key=api_key)
    session = None
    browser = None

    try:
        # Create a session with browser support
        print("📦 Creating browser session...")
        params = CreateSessionParams(image_id="agb-browser-use-1")
        result = agb.create(params)

        if not result.success:
            raise RuntimeError(f"Failed to create session: {result.error_message}")

        session = result.session
        print(f"✅ Session created: {session.session_id}")

        # Configure browser options
        option = BrowserOption(
            use_stealth=True,
        )

        # Initialize browser
        print("🌐 Initializing browser...")
        success = await session.browser.initialize_async(option)
        if not success:
            raise RuntimeError("Browser initialization failed")

        print("✅ Browser initialized successfully")

        # Get CDP endpoint and connect Playwright
        endpoint_url = session.browser.get_endpoint_url()
        print(f"🔗 CDP endpoint: {endpoint_url}")

        async with async_playwright() as p:
            browser = await p.chromium.connect_over_cdp(endpoint_url)
            context = browser.contexts[0]
            page = await context.new_page()

            # Navigate to different websites
            websites = [
                "https://example.com",
                "https://httpbin.org/html",
                "https://quotes.toscrape.com"
            ]

            for url in websites:
                print(f"\n📍 Navigating to: {url}")

                # Navigate to the page
                await page.goto(url, wait_until="networkidle")

                # Get page information
                title = await page.title()
                current_url = page.url

                print(f"  📄 Title: {title}")
                print(f"  🔗 URL: {current_url}")

                # Get page content info
                body_text = await page.evaluate("document.body.innerText")
                text_length = len(body_text.strip())
                print(f"  📝 Content length: {text_length} characters")

                # Check for common elements
                has_forms = await page.evaluate("document.forms.length > 0")
                has_images = await page.evaluate("document.images.length > 0")
                has_links = await page.evaluate("document.links.length > 0")

                print(f"  🔍 Page analysis:")
                print(f"    - Has forms: {has_forms}")
                print(f"    - Has images: {has_images}")
                print(f"    - Has links: {has_links}")

                # Wait a moment before next navigation
                await asyncio.sleep(2)

            # Demonstrate browser navigation methods
            print(f"\n🔄 Testing browser navigation...")

            try:
                # Go back with minimal wait - just wait for navigation to start
                await page.go_back(timeout=10000, wait_until="commit")  # 10 seconds, wait for navigation to commit
                print(f"  ⬅️  Went back to: {page.url}")

                # Go forward with minimal wait
                await page.go_forward(timeout=10000, wait_until="commit")  # 10 seconds, wait for navigation to commit
                print(f"  ➡️  Went forward to: {page.url}")

            except Exception as nav_error:
                print(f"  ⚠️  Navigation test failed: {nav_error}")
                print(f"  📍 Current URL: {page.url}")

            try:
                # Reload page with shorter timeout
                await page.reload(timeout=10000)  # 10 seconds timeout
                print(f"  🔄 Reloaded page: {page.url}")

                # Take a screenshot
                screenshot_path = "/tmp/navigation_example.png"
                await page.screenshot(path=screenshot_path)
                print(f"  📸 Screenshot saved to: {screenshot_path}")

            except Exception as reload_error:
                print(f"  ⚠️  Reload/screenshot failed: {reload_error}")
                print(f"  📍 Current URL: {page.url}")

            await browser.close()
            print("✅ Browser closed successfully")

    except Exception as e:
        print(f"❌ Error occurred: {e}")
        if browser:
            await browser.close()
        raise

    finally:
        # Clean up session
        if session:
            agb.delete(session)
            print("🧹 Session cleaned up")

    print("🎉 Basic navigation example completed successfully!")


if __name__ == "__main__":
    asyncio.run(main())

Browser Fingerprint Basic Usage (browser_fingerprint_basic_usage.py)

Demonstrates how to use browser fingerprint to avoid detection by anti-bot services. It generates a random, realistic browser fingerprint (e.g., Windows desktop) and verifies the user agent and navigator properties.

py
"""
Example demonstrating Browser Fingerprint basic usage with AGB SDK.

This example shows how to use browser fingerprint to avoid detection by anti-bot services.
It will generate a random, realistic browser fingerprint and make the browser behave more like a real user.

This example will:
1. Create AIBrowser session with random fingerprint and simulate a Windows desktop browser.
2. Use playwright to connect to AIBrowser instance through CDP protocol
3. Verify user agent and navigator properties
"""

import os
import asyncio

from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser.browser import BrowserOption, BrowserFingerprint

from playwright.async_api import async_playwright


async def main():
    """Main function demonstrating browser fingerprint basic usage."""
    # Get API key from environment variable
    api_key = os.getenv("AGB_API_KEY")
    if not api_key:
        print("Error: AGB_API_KEY environment variable not set")
        return

    # Initialize AGB client
    print("Initializing AGB client...")
    agb = AGB(api_key=api_key)

    # Create a session
    print("Creating a new session...")
    params = CreateSessionParams(
        image_id="agb-browser-use-1",
    )
    session_result = agb.create(params)

    if session_result.success:
        session = session_result.session
        print(f"Session created with ID: {session.session_id}")

        """Create browser fingerprint option
        - devices: desktop or mobile
        - operating_systems: windows, macos, linux, android, ios

        You can specify one or multiple values for each parameter.
        But if you specify devices as desktop and operating_systems as android/ios,
        the fingerprint feature will not work.
        """
        browser_fingerprint = BrowserFingerprint(
            devices=["desktop"],
            operating_systems=["windows"],
            locales=["zh-CN", "zh"]
        )

        # Create browser option with stealth mode and fingerprint option limit.
        # This will help to avoid detection by anti-bot services. It will
        # generate a random, realistic browser fingerprint and make the browser
        # behave more like a real user.
        browser_option = BrowserOption(
            use_stealth=True,
            fingerprint=browser_fingerprint
        )

        if await session.browser.initialize_async(browser_option):
            endpoint_url = session.browser.get_endpoint_url()
            print("endpoint_url =", endpoint_url)

            async with async_playwright() as p:
                browser = await p.chromium.connect_over_cdp(endpoint_url)
                context = browser.contexts[0]
                page = await context.new_page()

                # Check user agent.
                print("\n--- Check User Agent ---")
                await page.goto("https://httpbin.org/user-agent", timeout=120000)

                response = await page.evaluate("() => JSON.parse(document.body.textContent)")
                user_agent = response.get("user-agent", "")
                print(f"User Agent: {user_agent}")

                # Check navigator properties.
                print("\n--- Check Navigator Properties ---")
                nav_info = await page.evaluate("""
                    () => ({
                        platform: navigator.platform,
                        language: navigator.language,
                        languages: navigator.languages,
                        webdriver: navigator.webdriver
                    })
                """)
                print(f"Platform: {nav_info.get('platform')}")
                print(f"Language: {nav_info.get('language')}")
                print(f"Languages: {nav_info.get('languages')}")
                print(f"WebDriver: {nav_info.get('webdriver')}")

                await page.wait_for_timeout(3000)
                await browser.close()

        # Clean up session
        agb.delete(session)

if __name__ == "__main__":
    asyncio.run(main())

Browser Fingerprint Construct (browser_fingerprint_construct.py)

Shows how to construct a custom FingerprintFormat from a JSON file and apply it to the remote browser. This allows you to fully control the browser fingerprint details.

py
"""
Example demonstrating Browser Fingerprint local sync feature with AGB SDK.

This example shows how to sync local browser fingerprint to remote browser fingerprint.
BrowserFingerprintGenerator has ability to dump local installed chrome browser fingerprint,
and then you can sync it to remote browser fingerprint by using BrowserOption.fingerprint_format.

This example will:
1. Generate local chrome browser fingerprint by BrowserFingerprintGenerator
2. Sync local browser fingerprint to remote browser fingerprint
3. Verify remote browser fingerprint
4. Clean up session
"""

import os
import asyncio

from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser.browser import BrowserOption
from agb.modules.browser.fingerprint import BrowserFingerprintGenerator, FingerprintFormat

from playwright.async_api import async_playwright

async def generate_fingerprint_by_file() -> FingerprintFormat:
    """Generate fingerprint by file."""
    with open(os.path.join(os.path.dirname(__file__), "../../../resource/fingerprint.example.json"), "r") as f:
        fingerprint_format = FingerprintFormat.load(f.read())
    return fingerprint_format

async def main():
    """Main function demonstrating browser fingerprint basic usage."""
    # Get API key from environment variable
    api_key = os.getenv("AGB_API_KEY")
    if not api_key:
        print("Error: AGB_API_KEY environment variable not set")
        return

    # Initialize AGB client
    print("Initializing AGB client...")
    agb = AGB(api_key=api_key)

    # Create a session
    print("Creating a new session...")
    params = CreateSessionParams(
        image_id="agb-browser-use-1",
    )
    session_result = agb.create(params)

    if session_result.success:
        session = session_result.session
        print(f"Session created with ID: {session.session_id}")

        # You can generate fingerprint by file or construct FingerprintFormat by yourself totally.
        fingerprint_format = await generate_fingerprint_by_file()

        # Create browser option with fingerprint format.
        # Fingerprint format is dumped from file by generate_fingerprint_by_file()
        # automatically, you can use it to sync to remote browser fingerprint.
        browser_option = BrowserOption(
            use_stealth=True,
            fingerprint_format=fingerprint_format
        )

        if await session.browser.initialize_async(browser_option):
            endpoint_url = session.browser.get_endpoint_url()
            print("endpoint_url =", endpoint_url)

            async with async_playwright() as p:
                browser = await p.chromium.connect_over_cdp(endpoint_url)
                context = browser.contexts[0]
                page = await context.new_page()

                # Check user agent.
                print("\n--- Check User Agent ---")
                await page.goto("https://httpbin.org/user-agent", timeout=120000)

                response = await page.evaluate("() => JSON.parse(document.body.textContent)")
                user_agent = response.get("user-agent", "")
                print(f"User Agent: {user_agent}")
                assert user_agent == fingerprint_format.fingerprint.navigator.userAgent
                print("User Agent constructed correctly")

                await page.wait_for_timeout(3000)
                await browser.close()

        # Clean up session
        agb.delete(session)

if __name__ == "__main__":
    asyncio.run(main())

Browser Fingerprint Local Sync (browser_fingerprint_local_sync.py)

Demonstrates how to sync your local Chrome browser's fingerprint to the remote browser using BrowserFingerprintGenerator. This makes the remote browser behave exactly like your local browser.

py
"""
Example demonstrating Browser Fingerprint local sync feature with AGB SDK.

This example shows how to sync local browser fingerprint to remote browser fingerprint.
BrowserFingerprintGenerator has ability to dump local installed chrome browser fingerprint,
and then you can sync it to remote browser fingerprint by using BrowserOption.fingerprint_format.

This example will:
1. Generate local chrome browser fingerprint by BrowserFingerprintGenerator
2. Sync local browser fingerprint to remote browser fingerprint
3. Verify remote browser fingerprint
4. Clean up session
"""

import os
import asyncio

from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser.browser import BrowserOption
from agb.modules.browser.fingerprint import BrowserFingerprintGenerator

from playwright.async_api import async_playwright


async def main():
    """Main function demonstrating browser fingerprint basic usage."""
    # Get API key from environment variable
    api_key = os.getenv("AGB_API_KEY")
    if not api_key:
        print("Error: AGB_API_KEY environment variable not set")
        return

    # Initialize AGB client
    print("Initializing AGB client...")
    agb = AGB(api_key=api_key)

    # Create a session
    print("Creating a new session...")
    params = CreateSessionParams(
        image_id="agb-browser-use-1",
    )
    session_result = agb.create(params)

    if session_result.success:
        session = session_result.session
        print(f"Session created with ID: {session.session_id}")

        fingerprint_generator = BrowserFingerprintGenerator()
        fingerprint_format = await fingerprint_generator.generate_fingerprint()

        # Create browser option with fingerprint format.
        # Fingerprint format is dumped from local chrome browser by BrowserFingerprintGenerator
        # automatically, you can use it to sync to remote browser fingerprint.
        browser_option = BrowserOption(
            use_stealth=True,
            fingerprint_format=fingerprint_format
        )

        if await session.browser.initialize_async(browser_option):
            endpoint_url = session.browser.get_endpoint_url()
            print("endpoint_url =", endpoint_url)

            async with async_playwright() as p:
                browser = await p.chromium.connect_over_cdp(endpoint_url)
                context = browser.contexts[0]
                page = await context.new_page()

                # Check user agent.
                print("\n--- Check User Agent ---")
                await page.goto("https://httpbin.org/user-agent", timeout=120000)

                response = await page.evaluate("() => JSON.parse(document.body.textContent)")
                user_agent = response.get("user-agent", "")
                print(f"User Agent: {user_agent}")

                print("Please check if User Agent is synced correctly by visiting https://httpbin.org/user-agent in local chrome browser.")

                await page.wait_for_timeout(3000)
                await browser.close()

        # Clean up session
        agb.delete(session)


if __name__ == "__main__":
    asyncio.run(main())

Browser Fingerprint Persistence (browser_fingerprint_persistence.py)

A more advanced example showing how to persist browser fingerprint across multiple sessions using BrowserContext and BrowserFingerprintContext. This is useful for maintaining a consistent browser identity over time.

py
#!/usr/bin/env python3
"""
Integration test for browser fingerprint persistence functionality.
This test verifies that browser fingerprint can be persisted
across sessions using the same ContextId and FingerprintContextId.
"""

import asyncio
import os
import time
import unittest
from agb import AGB
from agb.session_params import CreateSessionParams, BrowserContext
from agb.modules.browser.browser import BrowserOption, BrowserFingerprint, BrowserFingerprintContext
from playwright.async_api import async_playwright

# Global variables for persistent context and fingerprint context
persistent_context = None
persistent_fingerprint_context = None

def get_test_api_key():
    """Get API key for testing"""
    api_key = os.environ.get("AGB_API_KEY")
    if not api_key:
        raise unittest.SkipTest("AGB_API_KEY environment variable not set")
    return api_key


def is_windows_user_agent(user_agent: str) -> bool:
    if not user_agent:
        return False
    user_agent_lower = user_agent.lower()
    windows_indicators = [
        'windows nt',
        'win32',
        'win64',
        'windows',
        'wow64'
    ]
    return any(indicator in user_agent_lower for indicator in windows_indicators)


def run_as_first_time():
    """Run as first time"""
    print("="*20)
    print("Run as first time")
    print("="*20)
    global persistent_context, persistent_fingerprint_context
    api_key = os.environ.get("AGB_API_KEY")
    if not api_key:
        print("Error: AGB_API_KEY environment variable not set")
        return

    agb = AGB(api_key)

    # Create a browser context for first time
    session_context_name = f"test-browser-context-{int(time.time())}"
    context_result = agb.context.get(session_context_name, True)
    if not context_result.success or not context_result.context:
        print("Failed to create browser context")
        return

    persistent_context = context_result.context
    print(f"Created browser context: {persistent_context.name} (ID: {persistent_context.id})")

    # Create a browser fingerprint context for first time
    fingerprint_context_name = f"test-browser-fingerprint-{int(time.time())}"
    fingerprint_context_result = agb.context.get(fingerprint_context_name, True)
    if not fingerprint_context_result.success or not fingerprint_context_result.context:
        print("Failed to create fingerprint context")
        return

    persistent_fingerprint_context = fingerprint_context_result.context
    print(f"Created fingerprint context: {persistent_fingerprint_context.name} (ID: {persistent_fingerprint_context.id})")


    # Create session with BrowserContext and FingerprintContext
    print(f"Creating session with browser context ID: {persistent_context.id} "
            f"and fingerprint context ID: {persistent_fingerprint_context.id}")
    fingerprint_context = BrowserFingerprintContext(persistent_fingerprint_context.id)
    browser_context = BrowserContext(persistent_context.id, auto_upload=True, fingerprint_context=fingerprint_context)
    params = CreateSessionParams(
        image_id="agb-browser-use-1",
        browser_context=browser_context
    )

    session_result = agb.create(params)
    if not session_result.success or not session_result.session:
        print(f"Failed to create first session: {session_result.error_message}")
        return

    session = session_result.session
    print(f"First session created with ID: {session.session_id}")

    # Get browser object and generate fingerprint for persistence
    async def first_session_operations():
        # Initialize browser with fingerprint persistent enabled and set fingerprint generation options
        browser_option = BrowserOption(
            use_stealth=True,
            fingerprint_persistent=True,
            fingerprint=BrowserFingerprint(
                devices=["desktop"],
                operating_systems=["windows"],
                locales=["zh-CN"],
            ),
        )
        init_success = await session.browser.initialize_async(browser_option)
        if not init_success:
            print("Failed to initialize browser")
            return
        print("First session browser initialized successfully")

        # Get endpoint URL
        endpoint_url = session.browser.get_endpoint_url()
        if not endpoint_url:
            print("Failed to get browser endpoint URL")
            return
        print(f"First session browser endpoint URL: {endpoint_url}")

        # Connect with playwright, test first session fingerprint
        print("Opening https://httpbin.org/user-agent and test user agent...")
        async with async_playwright() as p:
            browser = await p.chromium.connect_over_cdp(endpoint_url)
            context = browser.contexts[0] if browser.contexts else await browser.new_context()

            page = await context.new_page()
            await page.goto("https://httpbin.org/user-agent", timeout=120000)
            response = await page.evaluate("() => JSON.parse(document.body.textContent)")
            user_agent = response["user-agent"]
            print("user_agent =", user_agent)
            is_windows = is_windows_user_agent(user_agent)
            if not is_windows:
                print("Failed to get windows user agent")
                return

            await context.close()
            print("First session browser fingerprint check completed")

    # Run first session operations
    asyncio.run(first_session_operations())

    # Delete first session with syncContext=True
    print("Deleting first session with syncContext=True...")
    delete_result = agb.delete(session, sync_context=True)
    print(f"First session deleted successfully (RequestID: {delete_result.request_id})")


def run_as_second_time():
    """Run as second time"""
    print("="*20)
    print("Run as second time")
    print("="*20)
    global persistent_context, persistent_fingerprint_context
    api_key = os.environ.get("AGB_API_KEY")
    if not api_key:
        print("Error: AGB_API_KEY environment variable not set")
        return

    agb = AGB(api_key)

    # Create second session with same browser context and fingerprint context
    print(f"Creating second session with same browser context ID: {persistent_context.id} "
            f"and fingerprint context ID: {persistent_fingerprint_context.id}")
    fingerprint_context = BrowserFingerprintContext(persistent_fingerprint_context.id)
    browser_context = BrowserContext(persistent_context.id, auto_upload=True, fingerprint_context=fingerprint_context)
    params = CreateSessionParams(
        image_id="agb-browser-use-1",
        browser_context=browser_context
    )
    session_result = agb.create(params)
    if not session_result.success or not session_result.session:
        print(f"Failed to create second session: {session_result.error_message}")
        return

    session = session_result.session
    assert session is not None  # Type narrowing for linter
    print(f"Second session created with ID: {session.session_id}")

    # Get browser object and check if second session fingerprint is the same as first session
    async def second_session_operations():
        # Initialize browser with fingerprint persistent enabled but not specific fingerprint generation options
        browser_option = BrowserOption(
            use_stealth=True,
            fingerprint_persistent=True,
        )
        init_success = await session.browser.initialize_async(browser_option)
        if not init_success:
            print("Failed to initialize browser in second session")
            return
        print("Second session browser initialized successfully")

        # Get endpoint URL
        endpoint_url = session.browser.get_endpoint_url()
        if not endpoint_url:
            print("Failed to get browser endpoint URL in second session")
            return
        print(f"Second session browser endpoint URL: {endpoint_url}")

        # Connect with playwright and test second session fingerprint
        async with async_playwright() as p:
            browser = await p.chromium.connect_over_cdp(endpoint_url)
            context = browser.contexts[0] if browser.contexts else await browser.new_context()
            page = await context.new_page()
            await page.goto("https://httpbin.org/user-agent", timeout=120000)
            response = await page.evaluate("() => JSON.parse(document.body.textContent)")
            user_agent = response["user-agent"]
            print("user_agent =", user_agent)
            is_windows = is_windows_user_agent(user_agent)
            if not is_windows:
                print("Failed to get windows user agent in second session")
                return
            print(f"SUCCESS: fingerprint persisted correctly!")

            await context.close()
            print("Second session browser fingerprint check completed")

    # Run second session operations
    asyncio.run(second_session_operations())

    # Delete second session with syncContext=True
    print("Deleting second session with syncContext=True...")
    delete_result = agb.delete(session, sync_context=True)
    print(f"Second session deleted successfully (RequestID: {delete_result.request_id})")


def main():
    """Test browser fingerprint persist across sessions with the same browser and fingerprint context."""
    run_as_first_time()
    time.sleep(3)
    run_as_second_time()

if __name__ == "__main__":
    main()

Browser Type Selection (browser_type_example.py)

Demonstrates how to select between Chrome and Chromium browsers when using computer use images. Shows browser type configuration, verification, and best practices for choosing the right browser.

py
"""
Browser Type Selection Example

This example demonstrates how to select between Chrome and Chromium browsers
when using computer use images in AGB.

Features demonstrated:
- Chrome browser selection
- Chromium browser selection
- Default browser (None)
- Browser type verification
- Configuration comparison

Note: The browser_type option is only available for computer use images.
"""

import os
import asyncio
from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser.browser import BrowserOption, BrowserViewport
from playwright.async_api import async_playwright


async def test_browser_type(browser_type: str | None, description: str):
    """Test a specific browser type configuration."""
    print(f"\n{'='*60}")
    print(f"Testing: {description}")
    print(f"{'='*60}")

    api_key = os.getenv("AGB_API_KEY")
    if not api_key:
        raise RuntimeError("AGB_API_KEY environment variable not set")

    agb = AGB(api_key=api_key)

    # Create session with computer use image
    print("\n1. Creating session with computer use image...")
    params = CreateSessionParams(image_id="agb-linux-test-5")
    result = agb.create(params)

    if not result.success:
        raise RuntimeError(f"Failed to create session: {result.error_message}")

    session = result.session
    print(f"   ✓ Session created: {session.session_id}")

    try:
        # Initialize browser with specified type
        print(f"\n2. Initializing browser with type: {browser_type or 'default (None)'}")
        option = BrowserOption(
            browser_type=browser_type,
        )

        success = await session.browser.initialize_async(option)
        if not success:
            raise RuntimeError("Browser initialization failed")

        print(f"   ✓ Browser initialized successfully")

        # Get endpoint URL
        endpoint_url = session.browser.get_endpoint_url()
        print(f"\n3. CDP endpoint: {endpoint_url[:50]}...")

        # Connect Playwright and verify browser
        print("\n4. Connecting to browser via CDP...")
        async with async_playwright() as p:
            browser = await p.chromium.connect_over_cdp(endpoint_url)
            context = browser.contexts[0]
            page = await context.new_page()

            # Navigate to a page that shows browser info
            print("   ✓ Connected successfully")
            print("\n5. Verifying browser configuration...")

            await page.goto("https://www.whatismybrowser.com/")
            await page.wait_for_load_state("networkidle")

            # Get browser information
            user_agent = await page.evaluate("navigator.userAgent")
            viewport_width = await page.evaluate("window.innerWidth")
            viewport_height = await page.evaluate("window.innerHeight")

            print(f"\n   Browser Information:")
            print(f"   - User Agent: {user_agent[:80]}...")
            print(f"   - Viewport: {viewport_width} x {viewport_height}")
            print(f"   - Configured Type: {browser_type or 'default'}")

            # Check if Chrome or Chromium is in user agent
            if "Chrome" in user_agent:
                if "Chromium" in user_agent:
                    detected = "Chromium"
                else:
                    detected = "Chrome"
                print(f"   - Detected Browser: {detected}")

            await browser.close()

        print(f"\n   ✓ Test completed successfully for {description}")

    finally:
        print("\n6. Cleaning up...")
        session.delete()
        print("   ✓ Session deleted")


async def main():
    """Run browser type examples."""
    print("Browser Type Selection Example")
    print("=" * 60)
    print("\nThis example demonstrates browser type selection in AGB.")
    print("Note: browser_type is only available for computer use images.")

    # Test 1: Chrome browser
    await test_browser_type(
        browser_type="chrome",
        description="Chrome Browser (Google Chrome)"
    )

    await asyncio.sleep(2)  # Brief pause between tests

    # Test 2: Chromium browser
    await test_browser_type(
        browser_type="chromium",
        description="Chromium Browser (Open Source)"
    )

    await asyncio.sleep(2)  # Brief pause between tests

    # Test 3: Default (None)
    await test_browser_type(
        browser_type=None,
        description="Default Browser (Platform decides)"
    )

    print("\n" + "=" * 60)
    print("All browser type tests completed successfully!")
    print("=" * 60)

    # Summary
    print("\nSummary:")
    print("- Chrome: Use when you need Google Chrome specific features")
    print("- Chromium: Use for open-source, lighter resource usage")
    print("- Default (None): Let the platform choose the optimal browser")
    print("\nBest Practice: Use None unless you have a specific requirement")


async def quick_example():
    """Quick example showing the most common usage."""
    print("\n" + "=" * 60)
    print("Quick Example: Using Chrome Browser")
    print("=" * 60)

    api_key = os.getenv("AGB_API_KEY")
    agb = AGB(api_key=api_key)

    # Create session
    params = CreateSessionParams(image_id="agb-linux-test-5")
    result = agb.create(params)
    session = result.session

    try:
        # Simply specify browser_type in BrowserOption
        option = BrowserOption(browser_type="chrome")
        success = await session.browser.initialize_async(option)

        if success:
            print("✓ Chrome browser initialized successfully")

            # Get endpoint and use with Playwright
            endpoint_url = session.browser.get_endpoint_url()
            async with async_playwright() as p:
                browser = await p.chromium.connect_over_cdp(endpoint_url)
                page = await browser.contexts[0].new_page()

                await page.goto("https://example.com")
                title = await page.title()
                print(f"✓ Page title: {title}")

                await browser.close()
    finally:
        session.delete()


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Browser Type Selection Example")
    parser.add_argument(
        "--quick",
        action="store_true",
        help="Run quick example only"
    )

    args = parser.parse_args()

    if args.quick:
        asyncio.run(quick_example())
    else:
        asyncio.run(main())

Browser Command Arguments (browser_command_args.py)

Shows how to launch the browser with custom command-line arguments and a default navigation URL. Useful for disabling specific Chrome features or starting the browser on a specific page.

py
"""
Example demonstrating Browser Launch with Custom Command Arguments and
go to Default Navigation URL with AGB SDK.

This example shows how to configure browser with custom command arguments
and go to default navigation URL:
- Create AIBrowser session with custom command arguments and go to default navigation URL
- Use playwright to connect to AIBrowser instance through CDP protocol
- Verify the browser navigated to the default URL
- Test custom command arguments effects
"""

import os
import asyncio

from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser.browser import BrowserOption, BrowserFingerprint
from playwright.async_api import async_playwright


async def main():
    # Get API key from environment variable
    api_key = os.getenv("AGB_API_KEY")
    if not api_key:
        print("Error: AGB_API_KEY environment variable not set")
        return

    # Initialize AGB client
    print("Initializing AGB client...")
    agb = AGB(api_key=api_key)

    # Create a session
    print("Creating a new session...")
    params = CreateSessionParams(
        image_id="agb-browser-use-1"
    )
    session_result = agb.create(params)

    if session_result.success:
        session = session_result.session
        print(f"Session created with ID: {session.session_id}")

        # Create browser option with user defined cmd args and default navigate url
        browser_option = BrowserOption(
            cmd_args=["--disable-features=PrivacySandboxSettings4"],
            default_navigate_url="chrome://version/",
        )

        print("Browser configuration:")
        print("- Command arguments:", browser_option.cmd_args)
        print("- Default navigate URL:", browser_option.default_navigate_url)

        if await session.browser.initialize_async(browser_option):
            print("Browser initialized successfully")

            # Get browser endpoint URL
            endpoint_url = session.browser.get_endpoint_url()
            print(f"endpoint_url = {endpoint_url}")

            # Use Playwright to connect and validate
            async with async_playwright() as p:
                browser = await p.chromium.connect_over_cdp(endpoint_url)
                context = browser.contexts[0]
                page = context.pages[0]

                try:
                    # Check if browser navigated to default URL
                    print("\n--- Check Default Navigation ---")
                    await asyncio.sleep(2)  # Wait for navigation
                    current_url = page.url
                    print(f"Current URL: {current_url}")

                    if "chrome://version/" in current_url:
                        print("✓ Browser successfully navigated to default URL")
                    else:
                        print("✗ Browser did not navigate to default URL")

                    # Test command arguments effect by checking Chrome version page
                    if "chrome://version/" in current_url:
                        print("\n--- Check Chrome Version Info ---")
                        version_info = await page.evaluate("""
                            () => {
                                const versionElement = document.querySelector('#version');
                                const commandLineElement = document.querySelector('#command_line');
                                return {
                                    version: versionElement ? versionElement.textContent : 'Not found',
                                    commandLine: commandLineElement ? commandLineElement.textContent : 'Not found'
                                };
                            }
                        """)

                        print(f"Chrome Version: {version_info['version']}")
                        print(f"Command Line: {version_info['commandLine']}")
                        
                        if "--disable-features=PrivacySandboxSettings4" in version_info['commandLine']:
                            print("✓ Custom command argument found in browser")
                        else:
                            print("✗ Custom command argument not found in browser")

                    await asyncio.sleep(3)
                finally:
                    await browser.close()
                    session.browser.destroy()
        else:
            print("Failed to initialize browser")

        # Clean up session
        agb.delete(session)
    else:
        print("Failed to create session", session_result.error_message)

if __name__ == "__main__":
    asyncio.run(main())

Natural Language Actions (natural_language_actions.py)

Demonstrates how to use natural language instructions (e.g., "Click the 'Sign Up' button") to interact with the page, which simplifies automation logic.

py
#!/usr/bin/env python3
"""
Natural Language Actions Example

This example demonstrates AI-powered browser control using natural language:
- Using natural language to describe actions
- Complex multi-step operations
- Handling dynamic page content
- Error recovery with AI assistance
"""

import os
import asyncio
from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser import BrowserOption, BrowserViewport, ActOptions
from agb.exceptions import BrowserError
from playwright.async_api import async_playwright


async def main():
    """Main function demonstrating natural language browser actions."""

    # Get API key from environment
    api_key = os.getenv("AGB_API_KEY")
    if not api_key:
        raise ValueError("AGB_API_KEY environment variable not set")

    print("🤖 Starting natural language actions example...")

    # Initialize AGB client
    agb = AGB(api_key=api_key)
    session = None
    browser = None

    try:
        # Create a session with browser support
        print("📦 Creating browser session...")
        params = CreateSessionParams(image_id="agb-browser-use-1")
        result = agb.create(params)

        if not result.success:
            raise RuntimeError(f"Failed to create session: {result.error_message}")

        session = result.session
        print(f"✅ Session created: {session.session_id}")

        # Configure browser with stealth mode
        option = BrowserOption(
            use_stealth=True,
            viewport=BrowserViewport(width=1366, height=768)
        )

        # Initialize browser
        print("🌐 Initializing browser...")
        success = await session.browser.initialize_async(option)
        if not success:
            raise RuntimeError("Browser initialization failed")

        print("✅ Browser initialized successfully")

        # Get CDP endpoint and connect Playwright
        endpoint_url = session.browser.get_endpoint_url()

        async with async_playwright() as p:
            browser = await p.chromium.connect_over_cdp(endpoint_url)
            page = await browser.new_page()

            # Example 1: Search on a more reliable site
            print("\n🔍 Example 1: Search and Navigation")
            try:
                await page.goto("https://httpbin.org/html", wait_until="domcontentloaded", timeout=15000)

                # Use natural language to interact with the page
                search_result = await session.browser.agent.act_async(ActOptions(
                    action="Find and click on any link on this page"
                ), page)

                print(f"  Link click result: {search_result.success}")
                if search_result.success:
                    print(f"  Message: {search_result.message}")
                    print(f"  Current URL: {page.url}")
                else:
                    print(f"  Error: {search_result.message}")

            except Exception as e:
                print(f"  ⚠️  Search example failed: {e}")
                print(f"  📍 Current URL: {page.url}")

            # Example 2: Form Interaction
            print("\n📝 Example 2: Form Interaction")
            await page.goto("https://httpbin.org/forms/post", wait_until="domcontentloaded", timeout=15000)

            # Fill out a form using natural language
            form_actions = [
                "Fill the 'custname' field with 'John Doe'",
                "Fill the 'custtel' field with '123-456-7890'",
                "Fill the 'custemail' field with 'john.doe@example.com'",
                "Select 'Large' from the size dropdown",
                "Check the 'Bacon' checkbox",
                "Fill the delivery instructions with 'Please ring the doorbell'"
            ]

            for action_text in form_actions:
                result = await session.browser.agent.act_async(ActOptions(
                    action=action_text
                ), page)

                print(f"  Action: {action_text}")
                print(f"  Result: {'' if result.success else ''} {result.message}")

                if not result.success:
                    print(f"    Retrying with more specific instruction...")
                    # Retry with more specific instruction
                    retry_result = await session.browser.agent.act_async(ActOptions(
                        action=f"Find and {action_text.lower()}"
                    ), page)
                    print(f"    Retry result: {'' if retry_result.success else ''} {retry_result.message}")

            # Submit the form
            submit_result = await session.browser.agent.act_async(ActOptions(
                action="Click the submit button to submit the form"
            ), page)

            print(f"  Form submission: {'' if submit_result.success else ''} {submit_result.message}")

            # Example 3: Dynamic Content Interaction
            print("\n🔄 Example 3: Dynamic Content")
            await page.goto("https://quotes.toscrape.com", wait_until="domcontentloaded", timeout=15000)

            # Scroll and interact with dynamic content
            scroll_result = await session.browser.agent.act_async(ActOptions(
                action="Scroll down to see more quotes on the page"
            ), page)

            print(f"  Scroll result: {'' if scroll_result.success else ''} {scroll_result.message}")

            # Click on a tag to filter quotes
            tag_result = await session.browser.agent.act_async(ActOptions(
                action="Click on any tag link to filter quotes by that tag"
            ), page)

            print(f"  Tag click result: {'' if tag_result.success else ''} {tag_result.message}")
            if tag_result.success:
                print(f"  Current URL after tag click: {page.url}")

            # Example 4: Complex Multi-Step Workflow
            print("\n🔗 Example 4: Multi-Step Workflow")
            await page.goto("https://quotes.toscrape.com", wait_until="domcontentloaded", timeout=15000)

            # Multi-step workflow with error handling
            workflow_steps = [
                {
                    "action": "Find and click on the 'Next' button to go to the next page",
                    "description": "Navigate to next page"
                },
                {
                    "action": "Click on the author name of the first quote to view author details",
                    "description": "View author details"
                },
                {
                    "action": "Go back to the previous page using browser navigation",
                    "description": "Return to quotes page"
                }
            ]

            for i, step in enumerate(workflow_steps, 1):
                print(f"  Step {i}: {step['description']}")

                result = await session.browser.agent.act_async(ActOptions(
                    action=step['action']
                ), page)

                print(f"    Result: {'' if result.success else ''} {result.message}")

                if result.success:
                    print(f"    Current URL: {page.url}")
                    await asyncio.sleep(2)  # Wait between steps
                else:
                    print(f"    Step failed, continuing with next step...")

            # Example 5: Conditional Actions
            print("\n🤔 Example 5: Conditional Actions")
            await page.goto("https://httpbin.org/html", wait_until="domcontentloaded", timeout=15000)

            # Perform conditional actions based on page content
            conditional_result = await session.browser.agent.act_async(ActOptions(
                action="If there is a link that says 'Herman Melville', click on it. Otherwise, just scroll down the page"
            ), page)

            print(f"  Conditional action result: {'' if conditional_result.success else ''} {conditional_result.message}")

            await browser.close()
            print("✅ Browser closed successfully")

    except BrowserError as e:
        print(f"❌ Browser error occurred: {e}")
        if browser:
            await browser.close()
    except Exception as e:
        print(f"❌ Unexpected error occurred: {e}")
        if browser:
            await browser.close()
        raise

    finally:
        # Clean up session
        if session:
            agb.delete(session)
            print("🧹 Session cleaned up")

    print("🎉 Natural language actions example completed!")


if __name__ == "__main__":
    asyncio.run(main())

Data Extraction (data_extraction.py)

Shows how to extract structured data from web pages using the extract tool.

py
#!/usr/bin/env python3
"""
Data Extraction Example

This example demonstrates structured data extraction from web pages:
- Defining data schemas with Pydantic
- Extracting product information
- Handling lists and nested data
- Text-based vs DOM-based extraction
"""

import os
import asyncio
from typing import List, Optional
from pydantic import BaseModel, Field
from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser import BrowserOption, BrowserViewport, ExtractOptions
from agb.exceptions import BrowserError
from playwright.async_api import async_playwright


# Define data schemas using Pydantic
class Quote(BaseModel):
    """Schema for a single quote."""
    text: str = Field(description="The quote text")
    author: str = Field(description="The author of the quote")
    tags: List[str] = Field(description="Tags associated with the quote")


class QuotesList(BaseModel):
    """Schema for a list of quotes."""
    quotes: List[Quote] = Field(description="List of quotes on the page")


class Product(BaseModel):
    """Schema for product information."""
    name: str = Field(description="Product name")
    price: str = Field(description="Product price")
    rating: Optional[float] = Field(description="Product rating", default=None)
    availability: Optional[str] = Field(description="Product availability status", default=None)
    description: Optional[str] = Field(description="Product description", default=None)


class ProductList(BaseModel):
    """Schema for a list of products."""
    products: List[Product] = Field(description="List of products")


class NewsArticle(BaseModel):
    """Schema for news article."""
    title: str = Field(description="Article title")
    summary: str = Field(description="Article summary or excerpt")
    author: Optional[str] = Field(description="Article author", default=None)
    date: Optional[str] = Field(description="Publication date", default=None)
    url: Optional[str] = Field(description="Article URL", default=None)


class NewsList(BaseModel):
    """Schema for a list of news articles."""
    articles: List[NewsArticle] = Field(description="List of news articles")


async def main():
    """Main function demonstrating data extraction capabilities."""

    # Get API key from environment
    api_key = os.getenv("AGB_API_KEY")
    if not api_key:
        raise ValueError("AGB_API_KEY environment variable not set")

    print("📊 Starting data extraction example...")

    # Initialize AGB client
    agb = AGB(api_key=api_key)
    session = None
    browser = None

    try:
        # Create a session with browser support
        print("📦 Creating browser session...")
        params = CreateSessionParams(image_id="agb-browser-use-1")
        result = agb.create(params)

        if not result.success:
            raise RuntimeError(f"Failed to create session: {result.error_message}")

        session = result.session
        print(f"✅ Session created: {session.session_id}")

        # Configure browser
        option = BrowserOption(
            use_stealth=True,
            viewport=BrowserViewport(width=1366, height=768)
        )

        # Initialize browser
        print("🌐 Initializing browser...")
        success = await session.browser.initialize_async(option)
        if not success:
            raise RuntimeError("Browser initialization failed")

        print("✅ Browser initialized successfully")

        # Get CDP endpoint and connect Playwright
        endpoint_url = session.browser.get_endpoint_url()

        async with async_playwright() as p:
            browser = await p.chromium.connect_over_cdp(endpoint_url)
            page = await browser.new_page()

            # Example 1: Extract quotes from quotes.toscrape.com
            print("\n📝 Example 1: Extracting Quotes")
            await page.goto("https://quotes.toscrape.com")

            # Extract all quotes on the page
            success, quotes_data = await session.browser.agent.extract_async(ExtractOptions(
                instruction="Extract all quotes from this page including the quote text, author, and tags",
                schema=QuotesList,
                use_text_extract=True
            ), page)

            if success and quotes_data:
                print(f"  ✅ Successfully extracted {len(quotes_data.quotes)} quotes")
                for i, quote in enumerate(quotes_data.quotes[:3], 1):  # Show first 3
                    print(f"    Quote {i}:")
                    print(f"      Text: {quote.text[:100]}...")
                    print(f"      Author: {quote.author}")
                    print(f"      Tags: {', '.join(quote.tags)}")
            else:
                print("  ❌ Failed to extract quotes")

            # Example 2: Extract with CSS selector focus
            print("\n🎯 Example 2: Focused Extraction with CSS Selector")

            # Extract only the first quote using CSS selector
            success, focused_quotes = await session.browser.agent.extract_async(ExtractOptions(
                instruction="Extract the quote information from the selected quote container",
                schema=Quote,
                selector=".quote:first-child",  # Focus on first quote only
                use_text_extract=False
            ), page)

            if success and focused_quotes:
                print(f"  ✅ Successfully extracted focused quote:")
                print(f"    Text: {focused_quotes.text}")
                print(f"    Author: {focused_quotes.author}")
                print(f"    Tags: {', '.join(focused_quotes.tags)}")
            else:
                print("  ❌ Failed to extract focused quote")

            # Example 3: Extract from a different page structure
            print("\n📰 Example 3: Extracting from Different Page Structure")
            await page.goto("https://httpbin.org/html")

            # Define a simple schema for this page
            class SimplePageInfo(BaseModel):
                title: str = Field(description="Page title")
                headings: List[str] = Field(description="All headings on the page")
                links: List[str] = Field(description="All link texts on the page")

            success, page_info = await session.browser.agent.extract_async(ExtractOptions(
                instruction="Extract the page title, all headings, and all link texts from this HTML page",
                schema=SimplePageInfo,
                use_text_extract=True
            ), page)

            if success and page_info:
                print(f"  ✅ Successfully extracted page information:")
                print(f"    Title: {page_info.title}")
                print(f"    Headings: {page_info.headings}")
                print(f"    Links: {page_info.links}")
            else:
                print("  ❌ Failed to extract page information")

            # Example 4: Complex nested data extraction
            print("\n🏗️ Example 4: Complex Nested Data")
            await page.goto("https://quotes.toscrape.com")

            # Define a more complex schema
            class AuthorInfo(BaseModel):
                name: str = Field(description="Author name")
                quote_count: int = Field(description="Number of quotes by this author on the page")

            class PageAnalysis(BaseModel):
                total_quotes: int = Field(description="Total number of quotes on the page")
                authors: List[AuthorInfo] = Field(description="Information about authors")
                unique_tags: List[str] = Field(description="All unique tags used on the page")
                page_title: str = Field(description="Page title")

            success, analysis = await session.browser.agent.extract_async(ExtractOptions(
                instruction="Analyze this quotes page and extract comprehensive information about quotes, authors, tags, and page structure",
                schema=PageAnalysis,
                use_text_extract=True,
                dom_settle_timeout_ms=2000
            ), page)

            if success and analysis:
                print(f"  ✅ Successfully extracted complex analysis:")
                print(f"    Page title: {analysis.page_title}")
                print(f"    Total quotes: {analysis.total_quotes}")
                print(f"    Number of authors: {len(analysis.authors)}")
                print(f"    Unique tags: {len(analysis.unique_tags)}")
                print(f"    Sample authors:")
                for author in analysis.authors[:3]:
                    print(f"      - {author.name}: {author.quote_count} quotes")
            else:
                print("  ❌ Failed to extract complex analysis")

            # Example 5: Error handling and retry with different approaches
            print("\n🔄 Example 5: Error Handling and Retry Strategies")

            # Try to extract from a page that might not have the expected structure
            await page.goto("https://example.com")

            # First attempt with strict schema
            class StrictPageInfo(BaseModel):
                title: str = Field(description="Page title")
                main_content: str = Field(description="Main content text")
                links: List[str] = Field(description="All links on the page")

            success, strict_info = await session.browser.agent.extract_async(ExtractOptions(
                instruction="Extract title, main content, and all links from this page",
                schema=StrictPageInfo,
                use_text_extract=True
            ), page)

            if success and strict_info:
                print(f"  ✅ Strict extraction successful:")
                print(f"    Title: {strict_info.title}")
                print(f"    Content length: {len(strict_info.main_content)} characters")
                print(f"    Links found: {len(strict_info.links)}")
            else:
                print("  ⚠️ Strict extraction failed, trying flexible approach...")

                # Fallback with more flexible schema
                class FlexiblePageInfo(BaseModel):
                    title: Optional[str] = Field(description="Page title if available", default=None)
                    content: Optional[str] = Field(description="Any text content found", default=None)
                    has_links: bool = Field(description="Whether the page has any links", default=False)

                success, flexible_info = await session.browser.agent.extract_async(ExtractOptions(
                    instruction="Extract any available information from this page, being flexible about what's available",
                    schema=FlexiblePageInfo,
                    use_text_extract=True
                ), page)

                if success and flexible_info:
                    print(f"  ✅ Flexible extraction successful:")
                    print(f"    Title: {flexible_info.title}")
                    print(f"    Has content: {flexible_info.content is not None}")
                    print(f"    Has links: {flexible_info.has_links}")
                else:
                    print("  ❌ Both extraction attempts failed")

            # Example 6: Batch extraction from multiple elements
            print("\n📦 Example 6: Batch Extraction")
            await page.goto("https://quotes.toscrape.com")

            # Extract each quote individually to demonstrate batch processing
            quote_containers = await page.query_selector_all(".quote")
            print(f"  Found {len(quote_containers)} quote containers")

            extracted_quotes = []
            for i, container in enumerate(quote_containers[:3]):  # Process first 3
                print(f"    Processing quote {i+1}...")

                # Focus extraction on this specific container
                container_id = f"quote-{i}"
                await container.evaluate(f"(element) => element.id = '{container_id}'")

                success, quote = await session.browser.agent.extract_async(ExtractOptions(
                    instruction=f"Extract the quote information from the element with id '{container_id}'",
                    schema=Quote,
                    selector=f"#{container_id}",
                    use_text_extract=False
                ), page)

                if success and quote:
                    extracted_quotes.append(quote)
                    print(f"      ✅ Extracted: {quote.author}")
                else:
                    print(f"      ❌ Failed to extract quote {i+1}")

            print(f"  📊 Batch extraction completed: {len(extracted_quotes)} quotes extracted")

            await browser.close()
            print("✅ Browser closed successfully")

    except BrowserError as e:
        print(f"❌ Browser error occurred: {e}")
        if browser:
            await browser.close()
    except Exception as e:
        print(f"❌ Unexpected error occurred: {e}")
        if browser:
            await browser.close()
        raise

    finally:
        # Clean up session
        if session:
            agb.delete(session)
            print("🧹 Session cleaned up")

    print("🎉 Data extraction example completed!")


async def demonstrate_advanced_extraction_patterns():
    """Demonstrate advanced extraction patterns and best practices."""

    print("\n🔬 Advanced Extraction Patterns:")
    print("  1. Use specific CSS selectors for focused extraction")
    print("  2. Define flexible schemas with Optional fields for robust extraction")
    print("  3. Implement retry logic with different extraction strategies")
    print("  4. Use text-based extraction for better performance on content-heavy pages")
    print("  5. Use DOM-based extraction for precise element targeting")
    print("  6. Handle nested data structures with proper Pydantic models")
    print("  7. Implement batch processing for multiple similar elements")
    print("  8. Use appropriate timeouts based on page complexity")


if __name__ == "__main__":
    asyncio.run(main())

CAPTCHA Solving (captcha_tongcheng.py)

A more advanced example showing how the agent can handle complex interactions like CAPTCHA challenges on real-world sites.

py
"""
Example demonstrating AIBrowser capabilities with AGB SDK.
This example shows how to use AIBrowser to solve captcha automatically, including:
- Create AIBrowser session
- Use playwright to connect to AIBrowser instance through CDP protocol
- Set solve_captchas to be True and goto tongcheng website
- We will encounter a captcha and we will solve it automatically.
"""

import os
import time
import asyncio
import base64

from agb import AGB
from agb.session_params import CreateSessionParams
from agb.modules.browser import BrowserOption, BrowserViewport

from playwright.async_api import async_playwright

async def main():
    # Get API key from environment variable
    api_key = os.getenv("AGB_API_KEY")
    if not api_key:
        print("Error: AGB_API_KEY environment variable not set")
        return

    # Initialize AGB client
    print("Initializing AGB client...")
    agb = AGB(api_key=api_key)

    # Create a session
    print("Creating a new session...")
    params = CreateSessionParams(
        image_id="agb-browser-use-1",  # Updated image ID for AGB
    )
    session_result = agb.create(params)

    if session_result.success:
        session = session_result.session
        print(f"Session created with ID: {session.session_id}")

        # Configure browser options
        browser_option = BrowserOption(
            solve_captchas=True
        )

        # Initialize browser
        print("🌐 Initializing browser...")
        success = await session.browser.initialize_async(browser_option)
        if not success:
            print("❌ Browser initialization failed")
            return

        print("✅ Browser initialized successfully")
        endpoint_url = session.browser.get_endpoint_url()
        print(f"🔗 CDP endpoint: {endpoint_url}")

        browser = None
        try:
            async with async_playwright() as p:
                browser = await p.chromium.connect_over_cdp(endpoint_url)
                context = browser.contexts[0]
                page = await context.new_page()
                print("🌐 Navigating to tongcheng site...")
                url = "https://passport.ly.com/Passport/GetPassword"
                await page.goto(url, wait_until="domcontentloaded")

                # Use selector to locate input field
                input_element = await page.wait_for_selector('#name_in', timeout=10000)
                print("Found login name input field: #name_in")

                # Clear input field and enter phone number
                phone_number = "15011556760"
                print(f"Entering phone number: {phone_number}")

                await input_element.click()
                await input_element.fill("")  # Clear input field
                await input_element.type(phone_number)
                print("Waiting for captcha")

                # Wait a moment to ensure input is complete
                await asyncio.sleep(1)

                print("Clicking next step button...")
                await page.click('#next_step1')

                # Listen for captcha processing messages
                captcha_solving_started = False
                captcha_solving_finished = False

                # Listen for console messages
                def handle_console(msg):
                    nonlocal captcha_solving_started, captcha_solving_finished
                    print(f"🔍 Received console message: {msg.text}")
                    if msg.text == "wuying-captcha-solving-started":
                        captcha_solving_started = True
                        print("🎯 Setting captchaSolvingStarted = true")
                        # Use asyncio.create_task for async execution
                        asyncio.create_task(page.evaluate("window.captchaSolvingStarted = true; window.captchaSolvingFinished = false;"))
                    elif msg.text == "wuying-captcha-solving-finished":
                        captcha_solving_finished = True
                        print("✅ Setting captchaSolvingFinished = true")
                        # Use asyncio.create_task for async execution
                        asyncio.create_task(page.evaluate("window.captchaSolvingFinished = true;"))

                page.on("console", handle_console)

                # Wait 1 second first, then check if captcha processing has started
                try:
                    await asyncio.sleep(1)
                    await page.wait_for_function("() => window.captchaSolvingStarted === true", timeout=1000)
                    print("🎯 Detected captcha processing started, waiting for completion...")

                    # If start is detected, wait for completion (max 30 seconds)
                    try:
                        await page.wait_for_function("() => window.captchaSolvingFinished === true", timeout=30000)
                        print("✅ Captcha processing completed")
                    except:
                        print("⚠️ Captcha processing timeout, may still be in progress")

                except:
                    print("⏭️ No captcha processing detected, continuing execution")

                await asyncio.sleep(2)
                print("Test completed")

                # Keep browser open for a while to observe results
                await asyncio.sleep(5)

                # Take screenshot and print base64, can be pasted directly into Chrome address bar
                try:
                    screenshot_bytes = await page.screenshot(full_page=False)
                    b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
                    print("page_screenshot_base64 = data:image/png;base64,", b64)
                except Exception as e:
                    print("screenshot failed:", e)

                await browser.close()
                print("✅ Browser closed successfully")

        except Exception as e:
            print(f"❌ Error occurred: {e}")
            if browser:
                await browser.close()
            raise

    else:
        print(f"❌ Failed to create session: {session_result.error_message}")
        return

    # Clean up session
    if session_result.success:
        agb.delete(session)
        print("🧹 Session cleaned up")

    print("🎉 Captcha solving example completed successfully!")

if __name__ == "__main__":
    asyncio.run(main())

Notes

  • Browser sessions consume more memory than standard code execution sessions.
  • Screenshots are returned as base64 encoded strings.