Crawl multiple URLs
This example demonstrates how to crawl a specified list of URLs using different crawlers. You'll learn how to set up the crawler, define a request handler, and run the crawler with multiple URLs. This setup is useful for scraping data from multiple pages or websites concurrently.
- BeautifulSoupCrawler
- PlaywrightCrawler
import asyncio
from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
async def main() -> None:
crawler = BeautifulSoupCrawler()
# Define the default request handler, which will be called for every request.
@crawler.router.default_handler
async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
context.log.info(f'Processing {context.request.url} ...')
# Run the crawler with the initial list of requests.
await crawler.run(
[
'https://crawlee.dev',
'https://apify.com',
'https://example.com',
]
)
if __name__ == '__main__':
asyncio.run(main())
import asyncio
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
async def main() -> None:
crawler = PlaywrightCrawler()
# Define the default request handler, which will be called for every request.
@crawler.router.default_handler
async def request_handler(context: PlaywrightCrawlingContext) -> None:
context.log.info(f'Processing {context.request.url} ...')
# Run the crawler with the initial list of requests.
await crawler.run(
[
'https://crawlee.dev',
'https://apify.com',
'https://example.com',
]
)
if __name__ == '__main__':
asyncio.run(main())