Skip to main content

Configuration

crawlee.configuration.Configuration

Configuration of the Crawler.

Args: internal_timeout: timeout for internal operations such as marking a request as processed verbose_log: allows verbose logging default_storage_id: The default storage ID. purge_on_start: Whether to purge the storage on start.

Index

Methods

get_global_configuration

  • get_global_configuration(): Self
  • Retrieve the global instance of the configuration.


    Returns Self

Properties

available_memory_ratio

available_memory_ratio: Annotated[ float | None, Field( validation_alias=AliasChoices( 'apify_available_memory_ratio', 'crawlee_available_memory_ratio', ) ), ]

chrome_executable_path

chrome_executable_path: Annotated[ str | None, Field( validation_alias=AliasChoices( 'apify_chrome_executable_path', 'crawlee_chrome_executable_path', ) ), ]

default_browser_path

default_browser_path: Annotated[ str | None, Field( validation_alias=AliasChoices( 'apify_default_browser_path', 'crawlee_default_browser_path', ) ), ]

default_dataset_id

default_dataset_id: Annotated[ str, Field( validation_alias=AliasChoices( 'actor_default_dataset_id', 'apify_default_dataset_id', 'crawlee_default_dataset_id', ) ), ]

default_key_value_store_id

default_key_value_store_id: Annotated[ str, Field( validation_alias=AliasChoices( 'actor_default_key_value_store_id', 'apify_default_key_value_store_id', 'crawlee_default_key_value_store_id', ) ), ]

default_request_queue_id

default_request_queue_id: Annotated[ str, Field( validation_alias=AliasChoices( 'actor_default_request_queue_id', 'apify_default_request_queue_id', 'crawlee_default_request_queue_id', ) ), ]

disable_browser_sandbox

disable_browser_sandbox: Annotated[ bool, Field( validation_alias=AliasChoices( 'apify_disable_browser_sandbox', 'crawlee_disable_browser_sandbox', ) ), ]

headless

headless: Annotated[ bool, Field( validation_alias=AliasChoices( 'apify_headless', 'crawlee_headless', ) ), ]

in_cloud

in_cloud: Annotated[bool, Field(alias='crawlee_in_cloud')]

internal_timeout

internal_timeout: Annotated[timedelta | None, Field(alias='crawlee_internal_timeout')]

log_level

log_level: Annotated[ int, Field( validation_alias=AliasChoices( 'apify_log_level', 'crawlee_log_level', ) ), ]

INFO

max_used_cpu_ratio

max_used_cpu_ratio: Annotated[ float, Field( validation_alias=AliasChoices( 'apify_max_used_cpu_ratio', 'crawlee_max_used_cpu_ratio', ) ), ]

memory_mbytes

memory_mbytes: Annotated[ int | None, Field( validation_alias=AliasChoices( 'actor_memory_mbytes', 'apify_memory_mbytes', 'crawlee_memory_mbytes', ) ), ]

model_config

model_config:

persist_state_interval

persist_state_interval: Annotated[ timedelta_ms, Field( validation_alias=AliasChoices( 'apify_persist_state_interval_millis', 'crawlee_persist_state_interval_millis', ) ), ]

persist_storage

persist_storage: Annotated[ bool, Field( validation_alias=AliasChoices( 'apify_persist_storage', 'crawlee_persist_storage', ) ), ]

purge_on_start

purge_on_start: Annotated[ bool, Field( validation_alias=AliasChoices( 'apify_purge_on_start', 'crawlee_purge_on_start', ) ), ]

storage_dir

storage_dir: Annotated[ str, Field( validation_alias=AliasChoices( 'apify_local_storage_dir', 'crawlee_storage_dir', ), ), ]

system_info_interval

system_info_interval: Annotated[ timedelta_ms, Field( validation_alias=AliasChoices( 'apify_system_info_interval_millis', 'crawlee_system_info_interval_millis', ) ), ]

verbose_log

verbose_log: Annotated[bool, Field(alias='crawlee_verbose_log')]

write_metadata

write_metadata: Annotated[bool, Field(alias='crawlee_write_metadata')]

xvfb

xvfb: Annotated[ bool, Field( validation_alias=AliasChoices( 'apify_xvfb', 'crawlee_xvfb', ) ), ]