Using browser profile
This example demonstrates how to run PlaywrightCrawler
using your local browser profile from Chrome or Firefox.
Using browser profiles allows you to leverage existing login sessions, saved passwords, bookmarks, and other personalized browser data during crawling. This can be particularly useful for testing scenarios or when you need to access content that requires authentication.
Chrome browser
To run PlaywrightCrawler
with your Chrome profile, you need to know the path to your profile files. You can find this information by entering chrome://version/
as a URL in your Chrome browser. If you have multiple profiles, pay attention to the profile name - if you only have one profile, it's always Default
.
You also need to use the channel
parameter in browser_launch_options
to use the Chrome browser installed on your system instead of Playwright's Chromium.
Due to Chrome's security policies, automation cannot use your main browsing profile directly. The example copies your profile to a temporary location as a workaround.
Make sure you don't have any running Chrome browser processes before running this code:
import asyncio
import shutil
from pathlib import Path
from tempfile import TemporaryDirectory
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
# Profile name to use (usually 'Default' for single profile setups)
PROFILE_NAME = 'Default'
# Paths to Chrome profiles in your system (example for Windows)
# Use `chrome://version/` to find your profile path
PROFILE_PATH = Path(Path.home(), 'AppData', 'Local', 'Google', 'Chrome', 'User Data')
async def main() -> None:
# Create a temporary folder to copy the profile to
with TemporaryDirectory(prefix='crawlee-') as tmpdirname:
tmp_profile_dir = Path(tmpdirname)
# Copy the profile to a temporary folder
shutil.copytree(
PROFILE_PATH / PROFILE_NAME,
tmp_profile_dir / PROFILE_NAME,
dirs_exist_ok=True,
)
crawler = PlaywrightCrawler(
headless=False,
# Use chromium for Chrome compatibility
browser_type='chromium',
# Disable fingerprints to preserve profile identity
fingerprint_generator=None,
# Set user data directory to temp folder
user_data_dir=tmp_profile_dir,
browser_launch_options={
# Use installed Chrome browser
'channel': 'chrome',
# Slow down actions to mimic human behavior
'slow_mo': 200,
'args': [
# Use the specified profile
f'--profile-directory={PROFILE_NAME}',
],
},
)
@crawler.router.default_handler
async def default_handler(context: PlaywrightCrawlingContext) -> None:
context.log.info(f'Visiting {context.request.url}')
await crawler.run(['https://crawlee.dev/'])
if __name__ == '__main__':
asyncio.run(main())
Firefox browser
To find the path to your Firefox profile, enter about:profiles
as a URL in your Firefox browser. Unlike Chrome, you can use your standard profile path directly without copying it first.
Make sure you don't have any running Firefox browser processes before running this code:
import asyncio
from pathlib import Path
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
# Replace this with your actual Firefox profile name
# Find it at about:profiles in Firefox
PROFILE_NAME = 'your-profile-name-here'
# Paths to Firefox profiles in your system (example for Windows)
# Use `about:profiles` to find your profile path
PROFILE_PATH = Path(
Path.home(), 'AppData', 'Roaming', 'Mozilla', 'Firefox', 'Profiles', PROFILE_NAME
)
async def main() -> None:
crawler = PlaywrightCrawler(
# Use Firefox browser type
browser_type='firefox',
# Disable fingerprints to use the profile as is
fingerprint_generator=None,
headless=False,
# Path to your Firefox profile
user_data_dir=PROFILE_PATH,
browser_launch_options={
'args': [
# Required to avoid version conflicts
'--allow-downgrade'
]
},
)
@crawler.router.default_handler
async def default_handler(context: PlaywrightCrawlingContext) -> None:
context.log.info(f'Visiting {context.request.url}')
await crawler.run(['https://crawlee.dev/'])
if __name__ == '__main__':
asyncio.run(main())