Edit on GitHub

sqlmesh.core.config.scheduler

  1from __future__ import annotations
  2
  3import abc
  4import sys
  5import typing as t
  6
  7from pydantic import Field, root_validator
  8from requests import Session
  9
 10from sqlmesh.core.config.base import BaseConfig
 11from sqlmesh.core.config.common import concurrent_tasks_validator
 12from sqlmesh.core.console import Console
 13from sqlmesh.core.plan import AirflowPlanEvaluator, BuiltInPlanEvaluator, PlanEvaluator
 14from sqlmesh.core.state_sync import EngineAdapterStateSync, StateReader, StateSync
 15from sqlmesh.schedulers.airflow.client import AirflowClient
 16
 17if t.TYPE_CHECKING:
 18    from google.auth.transport.requests import AuthorizedSession
 19
 20    from sqlmesh.core.context import Context
 21
 22if sys.version_info >= (3, 9):
 23    from typing import Annotated, Literal
 24else:
 25    from typing_extensions import Annotated, Literal
 26
 27
 28class _SchedulerConfig(abc.ABC):
 29    """Abstract base class for Scheduler configurations."""
 30
 31    @abc.abstractmethod
 32    def create_plan_evaluator(self, context: Context) -> PlanEvaluator:
 33        """Creates a Plan Evaluator instance.
 34
 35        Args:
 36            context: The SQLMesh Context.
 37        """
 38
 39    def create_state_sync(self, context: Context) -> t.Optional[StateSync]:
 40        """Creates a State Sync instance.
 41
 42        Args:
 43            context: The SQLMesh Context.
 44
 45        Returns:
 46            The StateSync instance.
 47        """
 48        return None
 49
 50    def create_state_reader(self, context: Context) -> t.Optional[StateReader]:
 51        """Creates a State Reader instance.
 52
 53        Functionality related to evaluation on a client side (Context.evaluate, Context.run, etc.)
 54        will be unavailable if a State Reader instance is available but a State Sync instance is not.
 55
 56        Args:
 57            context: The SQLMesh Context.
 58
 59        Returns:
 60            The StateReader instance.
 61        """
 62        return None
 63
 64
 65class BuiltInSchedulerConfig(_SchedulerConfig, BaseConfig):
 66    """The Built-In Scheduler configuration."""
 67
 68    type_: Literal["builtin"] = Field(alias="type", default="builtin")
 69
 70    def create_state_sync(self, context: Context) -> t.Optional[StateSync]:
 71        return EngineAdapterStateSync(context.engine_adapter, context.physical_schema)
 72
 73    def create_plan_evaluator(self, context: Context) -> PlanEvaluator:
 74        return BuiltInPlanEvaluator(
 75            state_sync=context.state_sync,
 76            snapshot_evaluator=context.snapshot_evaluator,
 77            backfill_concurrent_tasks=context.concurrent_tasks,
 78            console=context.console,
 79        )
 80
 81
 82class _BaseAirflowSchedulerConfig(_SchedulerConfig):
 83    dag_run_poll_interval_secs: int
 84    dag_creation_poll_interval_secs: int
 85    dag_creation_max_retry_attempts: int
 86
 87    backfill_concurrent_tasks: int
 88    ddl_concurrent_tasks: int
 89
 90    @abc.abstractmethod
 91    def get_client(self, console: t.Optional[Console] = None) -> AirflowClient:
 92        """Constructs the Airflow Client instance."""
 93
 94    def create_state_reader(self, context: Context) -> t.Optional[StateReader]:
 95        from sqlmesh.schedulers.airflow.state_sync import HttpStateReader
 96
 97        return HttpStateReader(
 98            client=self.get_client(context.console),
 99            dag_run_poll_interval_secs=self.dag_run_poll_interval_secs,
100            console=context.console,
101        )
102
103    def create_plan_evaluator(self, context: Context) -> PlanEvaluator:
104        return AirflowPlanEvaluator(
105            airflow_client=self.get_client(context.console),
106            dag_run_poll_interval_secs=self.dag_run_poll_interval_secs,
107            dag_creation_poll_interval_secs=self.dag_creation_poll_interval_secs,
108            dag_creation_max_retry_attempts=self.dag_creation_max_retry_attempts,
109            console=context.console,
110            notification_targets=context.notification_targets,
111            backfill_concurrent_tasks=self.backfill_concurrent_tasks,
112            ddl_concurrent_tasks=self.ddl_concurrent_tasks,
113            users=context.users,
114        )
115
116
117class AirflowSchedulerConfig(_BaseAirflowSchedulerConfig, BaseConfig):
118    """The Airflow Scheduler configuration.
119
120    Args:
121        airflow_url: The URL of the Airflow Webserver.
122        username: The Airflow username.
123        password: The Airflow password.
124        dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds).
125        dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds).
126        dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for
127            whether a DAG has been created.
128        backfill_concurrent_tasks: The number of concurrent tasks used for model backfilling during plan application.
129        ddl_concurrent_tasks: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc).
130    """
131
132    airflow_url: str = "http://localhost:8080/"
133    username: str = "airflow"
134    password: str = "airflow"
135    dag_run_poll_interval_secs: int = 10
136    dag_creation_poll_interval_secs: int = 30
137    dag_creation_max_retry_attempts: int = 10
138
139    backfill_concurrent_tasks: int = 4
140    ddl_concurrent_tasks: int = 4
141
142    type_: Literal["airflow"] = Field(alias="type", default="airflow")
143
144    _concurrent_tasks_validator = concurrent_tasks_validator
145
146    def get_client(self, console: t.Optional[Console] = None) -> AirflowClient:
147        session = Session()
148        session.headers.update({"Content-Type": "application/json"})
149        session.auth = (self.username, self.password)
150
151        return AirflowClient(
152            session=session,
153            airflow_url=self.airflow_url,
154            console=console,
155        )
156
157
158class CloudComposerSchedulerConfig(_BaseAirflowSchedulerConfig, BaseConfig):
159    """The Google Cloud Composer configuration.
160
161    Args:
162        airflow_url: The URL of the Airflow Webserver.
163        dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds).
164        dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds).
165        dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for
166            whether a DAG has been created.
167        backfill_concurrent_tasks: The number of concurrent tasks used for model backfilling during plan application.
168        ddl_concurrent_tasks: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc).
169    """
170
171    airflow_url: str
172    dag_run_poll_interval_secs: int = 10
173    dag_creation_poll_interval_secs: int = 30
174    dag_creation_max_retry_attempts: int = 10
175
176    backfill_concurrent_tasks: int = 4
177    ddl_concurrent_tasks: int = 4
178
179    type_: Literal["cloud_composer"] = Field(alias="type", default="cloud_composer")
180
181    _concurrent_tasks_validator = concurrent_tasks_validator
182
183    class Config:
184        # See `check_supported_fields` for the supported extra fields
185        extra = "allow"
186
187    def __init__(self, **data: t.Any) -> None:
188        super().__init__(**data)
189        self._session: t.Optional[AuthorizedSession] = data.get("session")
190
191    @property
192    def session(self) -> AuthorizedSession:
193        import google.auth
194        from google.auth.transport.requests import AuthorizedSession
195
196        if self._session is None:
197            self._session = AuthorizedSession(
198                google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"])[0]
199            )
200            self._session.headers.update({"Content-Type": "application/json"})
201        return self._session
202
203    def get_client(self, console: t.Optional[Console] = None) -> AirflowClient:
204        return AirflowClient(
205            airflow_url=self.airflow_url,
206            session=self.session,
207            console=console,
208        )
209
210    @root_validator(pre=True)
211    def check_supported_fields(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
212        allowed_field_names = {field.alias for field in cls.__fields__.values()}
213        allowed_field_names.add("session")
214
215        for field_name in values:
216            if field_name not in allowed_field_names:
217                raise ValueError(f"Unsupported Field: {field_name}")
218        return values
219
220
221SchedulerConfig = Annotated[
222    t.Union[BuiltInSchedulerConfig, AirflowSchedulerConfig, CloudComposerSchedulerConfig],
223    Field(discriminator="type_"),
224]
class BuiltInSchedulerConfig(_SchedulerConfig, sqlmesh.core.config.base.BaseConfig):
66class BuiltInSchedulerConfig(_SchedulerConfig, BaseConfig):
67    """The Built-In Scheduler configuration."""
68
69    type_: Literal["builtin"] = Field(alias="type", default="builtin")
70
71    def create_state_sync(self, context: Context) -> t.Optional[StateSync]:
72        return EngineAdapterStateSync(context.engine_adapter, context.physical_schema)
73
74    def create_plan_evaluator(self, context: Context) -> PlanEvaluator:
75        return BuiltInPlanEvaluator(
76            state_sync=context.state_sync,
77            snapshot_evaluator=context.snapshot_evaluator,
78            backfill_concurrent_tasks=context.concurrent_tasks,
79            console=context.console,
80        )

The Built-In Scheduler configuration.

def create_state_sync( self, context: sqlmesh.core.context.Context) -> Optional[sqlmesh.core.state_sync.base.StateSync]:
71    def create_state_sync(self, context: Context) -> t.Optional[StateSync]:
72        return EngineAdapterStateSync(context.engine_adapter, context.physical_schema)

Creates a State Sync instance.

Arguments:
  • context: The SQLMesh Context.
Returns:

The StateSync instance.

def create_plan_evaluator( self, context: sqlmesh.core.context.Context) -> sqlmesh.core.plan.evaluator.PlanEvaluator:
74    def create_plan_evaluator(self, context: Context) -> PlanEvaluator:
75        return BuiltInPlanEvaluator(
76            state_sync=context.state_sync,
77            snapshot_evaluator=context.snapshot_evaluator,
78            backfill_concurrent_tasks=context.concurrent_tasks,
79            console=context.console,
80        )

Creates a Plan Evaluator instance.

Arguments:
  • context: The SQLMesh Context.
Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
_SchedulerConfig
create_state_reader
sqlmesh.core.config.base.BaseConfig
update_with
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class AirflowSchedulerConfig(_BaseAirflowSchedulerConfig, sqlmesh.core.config.base.BaseConfig):
118class AirflowSchedulerConfig(_BaseAirflowSchedulerConfig, BaseConfig):
119    """The Airflow Scheduler configuration.
120
121    Args:
122        airflow_url: The URL of the Airflow Webserver.
123        username: The Airflow username.
124        password: The Airflow password.
125        dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds).
126        dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds).
127        dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for
128            whether a DAG has been created.
129        backfill_concurrent_tasks: The number of concurrent tasks used for model backfilling during plan application.
130        ddl_concurrent_tasks: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc).
131    """
132
133    airflow_url: str = "http://localhost:8080/"
134    username: str = "airflow"
135    password: str = "airflow"
136    dag_run_poll_interval_secs: int = 10
137    dag_creation_poll_interval_secs: int = 30
138    dag_creation_max_retry_attempts: int = 10
139
140    backfill_concurrent_tasks: int = 4
141    ddl_concurrent_tasks: int = 4
142
143    type_: Literal["airflow"] = Field(alias="type", default="airflow")
144
145    _concurrent_tasks_validator = concurrent_tasks_validator
146
147    def get_client(self, console: t.Optional[Console] = None) -> AirflowClient:
148        session = Session()
149        session.headers.update({"Content-Type": "application/json"})
150        session.auth = (self.username, self.password)
151
152        return AirflowClient(
153            session=session,
154            airflow_url=self.airflow_url,
155            console=console,
156        )

The Airflow Scheduler configuration.

Arguments:
  • airflow_url: The URL of the Airflow Webserver.
  • username: The Airflow username.
  • password: The Airflow password.
  • dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds).
  • dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds).
  • dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for whether a DAG has been created.
  • backfill_concurrent_tasks: The number of concurrent tasks used for model backfilling during plan application.
  • ddl_concurrent_tasks: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc).
def get_client( self, console: Optional[sqlmesh.core.console.Console] = None) -> sqlmesh.schedulers.airflow.client.AirflowClient:
147    def get_client(self, console: t.Optional[Console] = None) -> AirflowClient:
148        session = Session()
149        session.headers.update({"Content-Type": "application/json"})
150        session.auth = (self.username, self.password)
151
152        return AirflowClient(
153            session=session,
154            airflow_url=self.airflow_url,
155            console=console,
156        )

Constructs the Airflow Client instance.

Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
_BaseAirflowSchedulerConfig
create_state_reader
create_plan_evaluator
_SchedulerConfig
create_state_sync
sqlmesh.core.config.base.BaseConfig
update_with
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class CloudComposerSchedulerConfig(_BaseAirflowSchedulerConfig, sqlmesh.core.config.base.BaseConfig):
159class CloudComposerSchedulerConfig(_BaseAirflowSchedulerConfig, BaseConfig):
160    """The Google Cloud Composer configuration.
161
162    Args:
163        airflow_url: The URL of the Airflow Webserver.
164        dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds).
165        dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds).
166        dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for
167            whether a DAG has been created.
168        backfill_concurrent_tasks: The number of concurrent tasks used for model backfilling during plan application.
169        ddl_concurrent_tasks: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc).
170    """
171
172    airflow_url: str
173    dag_run_poll_interval_secs: int = 10
174    dag_creation_poll_interval_secs: int = 30
175    dag_creation_max_retry_attempts: int = 10
176
177    backfill_concurrent_tasks: int = 4
178    ddl_concurrent_tasks: int = 4
179
180    type_: Literal["cloud_composer"] = Field(alias="type", default="cloud_composer")
181
182    _concurrent_tasks_validator = concurrent_tasks_validator
183
184    class Config:
185        # See `check_supported_fields` for the supported extra fields
186        extra = "allow"
187
188    def __init__(self, **data: t.Any) -> None:
189        super().__init__(**data)
190        self._session: t.Optional[AuthorizedSession] = data.get("session")
191
192    @property
193    def session(self) -> AuthorizedSession:
194        import google.auth
195        from google.auth.transport.requests import AuthorizedSession
196
197        if self._session is None:
198            self._session = AuthorizedSession(
199                google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"])[0]
200            )
201            self._session.headers.update({"Content-Type": "application/json"})
202        return self._session
203
204    def get_client(self, console: t.Optional[Console] = None) -> AirflowClient:
205        return AirflowClient(
206            airflow_url=self.airflow_url,
207            session=self.session,
208            console=console,
209        )
210
211    @root_validator(pre=True)
212    def check_supported_fields(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
213        allowed_field_names = {field.alias for field in cls.__fields__.values()}
214        allowed_field_names.add("session")
215
216        for field_name in values:
217            if field_name not in allowed_field_names:
218                raise ValueError(f"Unsupported Field: {field_name}")
219        return values

The Google Cloud Composer configuration.

Arguments:
  • airflow_url: The URL of the Airflow Webserver.
  • dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds).
  • dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds).
  • dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for whether a DAG has been created.
  • backfill_concurrent_tasks: The number of concurrent tasks used for model backfilling during plan application.
  • ddl_concurrent_tasks: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc).
CloudComposerSchedulerConfig(**data: Any)
188    def __init__(self, **data: t.Any) -> None:
189        super().__init__(**data)
190        self._session: t.Optional[AuthorizedSession] = data.get("session")
def get_client( self, console: Optional[sqlmesh.core.console.Console] = None) -> sqlmesh.schedulers.airflow.client.AirflowClient:
204    def get_client(self, console: t.Optional[Console] = None) -> AirflowClient:
205        return AirflowClient(
206            airflow_url=self.airflow_url,
207            session=self.session,
208            console=console,
209        )

Constructs the Airflow Client instance.

@root_validator(pre=True)
def check_supported_fields(cls, values: Dict[str, Any]) -> Dict[str, Any]:
211    @root_validator(pre=True)
212    def check_supported_fields(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
213        allowed_field_names = {field.alias for field in cls.__fields__.values()}
214        allowed_field_names.add("session")
215
216        for field_name in values:
217            if field_name not in allowed_field_names:
218                raise ValueError(f"Unsupported Field: {field_name}")
219        return values
class CloudComposerSchedulerConfig.Config:
184    class Config:
185        # See `check_supported_fields` for the supported extra fields
186        extra = "allow"
CloudComposerSchedulerConfig.Config()