sqlmesh.core.config.scheduler
1from __future__ import annotations 2 3import abc 4import sys 5import typing as t 6 7from pydantic import Field, root_validator 8from requests import Session 9 10from sqlmesh.core.config.base import BaseConfig 11from sqlmesh.core.config.common import concurrent_tasks_validator 12from sqlmesh.core.console import Console 13from sqlmesh.core.plan import AirflowPlanEvaluator, BuiltInPlanEvaluator, PlanEvaluator 14from sqlmesh.core.state_sync import EngineAdapterStateSync, StateReader, StateSync 15from sqlmesh.schedulers.airflow.client import AirflowClient 16 17if t.TYPE_CHECKING: 18 from google.auth.transport.requests import AuthorizedSession 19 20 from sqlmesh.core.context import Context 21 22if sys.version_info >= (3, 9): 23 from typing import Annotated, Literal 24else: 25 from typing_extensions import Annotated, Literal 26 27 28class _SchedulerConfig(abc.ABC): 29 """Abstract base class for Scheduler configurations.""" 30 31 @abc.abstractmethod 32 def create_plan_evaluator(self, context: Context) -> PlanEvaluator: 33 """Creates a Plan Evaluator instance. 34 35 Args: 36 context: The SQLMesh Context. 37 """ 38 39 def create_state_sync(self, context: Context) -> t.Optional[StateSync]: 40 """Creates a State Sync instance. 41 42 Args: 43 context: The SQLMesh Context. 44 45 Returns: 46 The StateSync instance. 47 """ 48 return None 49 50 def create_state_reader(self, context: Context) -> t.Optional[StateReader]: 51 """Creates a State Reader instance. 52 53 Functionality related to evaluation on a client side (Context.evaluate, Context.run, etc.) 54 will be unavailable if a State Reader instance is available but a State Sync instance is not. 55 56 Args: 57 context: The SQLMesh Context. 58 59 Returns: 60 The StateReader instance. 61 """ 62 return None 63 64 65class BuiltInSchedulerConfig(_SchedulerConfig, BaseConfig): 66 """The Built-In Scheduler configuration.""" 67 68 type_: Literal["builtin"] = Field(alias="type", default="builtin") 69 70 def create_state_sync(self, context: Context) -> t.Optional[StateSync]: 71 return EngineAdapterStateSync(context.engine_adapter, context.physical_schema) 72 73 def create_plan_evaluator(self, context: Context) -> PlanEvaluator: 74 return BuiltInPlanEvaluator( 75 state_sync=context.state_sync, 76 snapshot_evaluator=context.snapshot_evaluator, 77 backfill_concurrent_tasks=context.concurrent_tasks, 78 console=context.console, 79 ) 80 81 82class _BaseAirflowSchedulerConfig(_SchedulerConfig): 83 dag_run_poll_interval_secs: int 84 dag_creation_poll_interval_secs: int 85 dag_creation_max_retry_attempts: int 86 87 backfill_concurrent_tasks: int 88 ddl_concurrent_tasks: int 89 90 @abc.abstractmethod 91 def get_client(self, console: t.Optional[Console] = None) -> AirflowClient: 92 """Constructs the Airflow Client instance.""" 93 94 def create_state_reader(self, context: Context) -> t.Optional[StateReader]: 95 from sqlmesh.schedulers.airflow.state_sync import HttpStateReader 96 97 return HttpStateReader( 98 client=self.get_client(context.console), 99 dag_run_poll_interval_secs=self.dag_run_poll_interval_secs, 100 console=context.console, 101 ) 102 103 def create_plan_evaluator(self, context: Context) -> PlanEvaluator: 104 return AirflowPlanEvaluator( 105 airflow_client=self.get_client(context.console), 106 dag_run_poll_interval_secs=self.dag_run_poll_interval_secs, 107 dag_creation_poll_interval_secs=self.dag_creation_poll_interval_secs, 108 dag_creation_max_retry_attempts=self.dag_creation_max_retry_attempts, 109 console=context.console, 110 notification_targets=context.notification_targets, 111 backfill_concurrent_tasks=self.backfill_concurrent_tasks, 112 ddl_concurrent_tasks=self.ddl_concurrent_tasks, 113 users=context.users, 114 ) 115 116 117class AirflowSchedulerConfig(_BaseAirflowSchedulerConfig, BaseConfig): 118 """The Airflow Scheduler configuration. 119 120 Args: 121 airflow_url: The URL of the Airflow Webserver. 122 username: The Airflow username. 123 password: The Airflow password. 124 dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds). 125 dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds). 126 dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for 127 whether a DAG has been created. 128 backfill_concurrent_tasks: The number of concurrent tasks used for model backfilling during plan application. 129 ddl_concurrent_tasks: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc). 130 """ 131 132 airflow_url: str = "http://localhost:8080/" 133 username: str = "airflow" 134 password: str = "airflow" 135 dag_run_poll_interval_secs: int = 10 136 dag_creation_poll_interval_secs: int = 30 137 dag_creation_max_retry_attempts: int = 10 138 139 backfill_concurrent_tasks: int = 4 140 ddl_concurrent_tasks: int = 4 141 142 type_: Literal["airflow"] = Field(alias="type", default="airflow") 143 144 _concurrent_tasks_validator = concurrent_tasks_validator 145 146 def get_client(self, console: t.Optional[Console] = None) -> AirflowClient: 147 session = Session() 148 session.headers.update({"Content-Type": "application/json"}) 149 session.auth = (self.username, self.password) 150 151 return AirflowClient( 152 session=session, 153 airflow_url=self.airflow_url, 154 console=console, 155 ) 156 157 158class CloudComposerSchedulerConfig(_BaseAirflowSchedulerConfig, BaseConfig): 159 """The Google Cloud Composer configuration. 160 161 Args: 162 airflow_url: The URL of the Airflow Webserver. 163 dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds). 164 dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds). 165 dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for 166 whether a DAG has been created. 167 backfill_concurrent_tasks: The number of concurrent tasks used for model backfilling during plan application. 168 ddl_concurrent_tasks: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc). 169 """ 170 171 airflow_url: str 172 dag_run_poll_interval_secs: int = 10 173 dag_creation_poll_interval_secs: int = 30 174 dag_creation_max_retry_attempts: int = 10 175 176 backfill_concurrent_tasks: int = 4 177 ddl_concurrent_tasks: int = 4 178 179 type_: Literal["cloud_composer"] = Field(alias="type", default="cloud_composer") 180 181 _concurrent_tasks_validator = concurrent_tasks_validator 182 183 class Config: 184 # See `check_supported_fields` for the supported extra fields 185 extra = "allow" 186 187 def __init__(self, **data: t.Any) -> None: 188 super().__init__(**data) 189 self._session: t.Optional[AuthorizedSession] = data.get("session") 190 191 @property 192 def session(self) -> AuthorizedSession: 193 import google.auth 194 from google.auth.transport.requests import AuthorizedSession 195 196 if self._session is None: 197 self._session = AuthorizedSession( 198 google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"])[0] 199 ) 200 self._session.headers.update({"Content-Type": "application/json"}) 201 return self._session 202 203 def get_client(self, console: t.Optional[Console] = None) -> AirflowClient: 204 return AirflowClient( 205 airflow_url=self.airflow_url, 206 session=self.session, 207 console=console, 208 ) 209 210 @root_validator(pre=True) 211 def check_supported_fields(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]: 212 allowed_field_names = {field.alias for field in cls.__fields__.values()} 213 allowed_field_names.add("session") 214 215 for field_name in values: 216 if field_name not in allowed_field_names: 217 raise ValueError(f"Unsupported Field: {field_name}") 218 return values 219 220 221SchedulerConfig = Annotated[ 222 t.Union[BuiltInSchedulerConfig, AirflowSchedulerConfig, CloudComposerSchedulerConfig], 223 Field(discriminator="type_"), 224]
66class BuiltInSchedulerConfig(_SchedulerConfig, BaseConfig): 67 """The Built-In Scheduler configuration.""" 68 69 type_: Literal["builtin"] = Field(alias="type", default="builtin") 70 71 def create_state_sync(self, context: Context) -> t.Optional[StateSync]: 72 return EngineAdapterStateSync(context.engine_adapter, context.physical_schema) 73 74 def create_plan_evaluator(self, context: Context) -> PlanEvaluator: 75 return BuiltInPlanEvaluator( 76 state_sync=context.state_sync, 77 snapshot_evaluator=context.snapshot_evaluator, 78 backfill_concurrent_tasks=context.concurrent_tasks, 79 console=context.console, 80 )
The Built-In Scheduler configuration.
def
create_state_sync( self, context: sqlmesh.core.context.Context) -> Optional[sqlmesh.core.state_sync.base.StateSync]:
71 def create_state_sync(self, context: Context) -> t.Optional[StateSync]: 72 return EngineAdapterStateSync(context.engine_adapter, context.physical_schema)
Creates a State Sync instance.
Arguments:
- context: The SQLMesh Context.
Returns:
The StateSync instance.
def
create_plan_evaluator( self, context: sqlmesh.core.context.Context) -> sqlmesh.core.plan.evaluator.PlanEvaluator:
74 def create_plan_evaluator(self, context: Context) -> PlanEvaluator: 75 return BuiltInPlanEvaluator( 76 state_sync=context.state_sync, 77 snapshot_evaluator=context.snapshot_evaluator, 78 backfill_concurrent_tasks=context.concurrent_tasks, 79 console=context.console, 80 )
Creates a Plan Evaluator instance.
Arguments:
- context: The SQLMesh Context.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
118class AirflowSchedulerConfig(_BaseAirflowSchedulerConfig, BaseConfig): 119 """The Airflow Scheduler configuration. 120 121 Args: 122 airflow_url: The URL of the Airflow Webserver. 123 username: The Airflow username. 124 password: The Airflow password. 125 dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds). 126 dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds). 127 dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for 128 whether a DAG has been created. 129 backfill_concurrent_tasks: The number of concurrent tasks used for model backfilling during plan application. 130 ddl_concurrent_tasks: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc). 131 """ 132 133 airflow_url: str = "http://localhost:8080/" 134 username: str = "airflow" 135 password: str = "airflow" 136 dag_run_poll_interval_secs: int = 10 137 dag_creation_poll_interval_secs: int = 30 138 dag_creation_max_retry_attempts: int = 10 139 140 backfill_concurrent_tasks: int = 4 141 ddl_concurrent_tasks: int = 4 142 143 type_: Literal["airflow"] = Field(alias="type", default="airflow") 144 145 _concurrent_tasks_validator = concurrent_tasks_validator 146 147 def get_client(self, console: t.Optional[Console] = None) -> AirflowClient: 148 session = Session() 149 session.headers.update({"Content-Type": "application/json"}) 150 session.auth = (self.username, self.password) 151 152 return AirflowClient( 153 session=session, 154 airflow_url=self.airflow_url, 155 console=console, 156 )
The Airflow Scheduler configuration.
Arguments:
- airflow_url: The URL of the Airflow Webserver.
- username: The Airflow username.
- password: The Airflow password.
- dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds).
- dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds).
- dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for whether a DAG has been created.
- backfill_concurrent_tasks: The number of concurrent tasks used for model backfilling during plan application.
- ddl_concurrent_tasks: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc).
def
get_client( self, console: Optional[sqlmesh.core.console.Console] = None) -> sqlmesh.schedulers.airflow.client.AirflowClient:
147 def get_client(self, console: t.Optional[Console] = None) -> AirflowClient: 148 session = Session() 149 session.headers.update({"Content-Type": "application/json"}) 150 session.auth = (self.username, self.password) 151 152 return AirflowClient( 153 session=session, 154 airflow_url=self.airflow_url, 155 console=console, 156 )
Constructs the Airflow Client instance.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
class
CloudComposerSchedulerConfig(_BaseAirflowSchedulerConfig, sqlmesh.core.config.base.BaseConfig):
159class CloudComposerSchedulerConfig(_BaseAirflowSchedulerConfig, BaseConfig): 160 """The Google Cloud Composer configuration. 161 162 Args: 163 airflow_url: The URL of the Airflow Webserver. 164 dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds). 165 dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds). 166 dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for 167 whether a DAG has been created. 168 backfill_concurrent_tasks: The number of concurrent tasks used for model backfilling during plan application. 169 ddl_concurrent_tasks: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc). 170 """ 171 172 airflow_url: str 173 dag_run_poll_interval_secs: int = 10 174 dag_creation_poll_interval_secs: int = 30 175 dag_creation_max_retry_attempts: int = 10 176 177 backfill_concurrent_tasks: int = 4 178 ddl_concurrent_tasks: int = 4 179 180 type_: Literal["cloud_composer"] = Field(alias="type", default="cloud_composer") 181 182 _concurrent_tasks_validator = concurrent_tasks_validator 183 184 class Config: 185 # See `check_supported_fields` for the supported extra fields 186 extra = "allow" 187 188 def __init__(self, **data: t.Any) -> None: 189 super().__init__(**data) 190 self._session: t.Optional[AuthorizedSession] = data.get("session") 191 192 @property 193 def session(self) -> AuthorizedSession: 194 import google.auth 195 from google.auth.transport.requests import AuthorizedSession 196 197 if self._session is None: 198 self._session = AuthorizedSession( 199 google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"])[0] 200 ) 201 self._session.headers.update({"Content-Type": "application/json"}) 202 return self._session 203 204 def get_client(self, console: t.Optional[Console] = None) -> AirflowClient: 205 return AirflowClient( 206 airflow_url=self.airflow_url, 207 session=self.session, 208 console=console, 209 ) 210 211 @root_validator(pre=True) 212 def check_supported_fields(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]: 213 allowed_field_names = {field.alias for field in cls.__fields__.values()} 214 allowed_field_names.add("session") 215 216 for field_name in values: 217 if field_name not in allowed_field_names: 218 raise ValueError(f"Unsupported Field: {field_name}") 219 return values
The Google Cloud Composer configuration.
Arguments:
- airflow_url: The URL of the Airflow Webserver.
- dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds).
- dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds).
- dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for whether a DAG has been created.
- backfill_concurrent_tasks: The number of concurrent tasks used for model backfilling during plan application.
- ddl_concurrent_tasks: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc).
def
get_client( self, console: Optional[sqlmesh.core.console.Console] = None) -> sqlmesh.schedulers.airflow.client.AirflowClient:
204 def get_client(self, console: t.Optional[Console] = None) -> AirflowClient: 205 return AirflowClient( 206 airflow_url=self.airflow_url, 207 session=self.session, 208 console=console, 209 )
Constructs the Airflow Client instance.
@root_validator(pre=True)
def
check_supported_fields(cls, values: Dict[str, Any]) -> Dict[str, Any]:
211 @root_validator(pre=True) 212 def check_supported_fields(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]: 213 allowed_field_names = {field.alias for field in cls.__fields__.values()} 214 allowed_field_names.add("session") 215 216 for field_name in values: 217 if field_name not in allowed_field_names: 218 raise ValueError(f"Unsupported Field: {field_name}") 219 return values
Inherited Members
- sqlmesh.utils.pydantic.PydanticModel
- dict
- json
- missing_required_fields
- extra_fields
- all_fields
- required_fields
- pydantic.main.BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
class
CloudComposerSchedulerConfig.Config: