Edit on GitHub

Context

A SQLMesh context encapsulates a SQLMesh environment. When you create a new context, it will discover and load your project's models, macros, and audits. Afterwards, you can use the context to create and apply plans, visualize your model's lineage, run your audits and model tests, and perform various other tasks. For more information regarding what a context can do, see sqlmesh.core.context.Context.

Examples:

Creating and applying a plan against the staging environment.

from sqlmesh.core.context import Context
context = Context(path="example", config="local_config")
plan = context.plan("staging")
context.apply(plan)

Running audits on your data.

from sqlmesh.core.context import Context
context = Context(path="example", config="local_config")
context.audit("yesterday", "now")

Running tests on your models.

from sqlmesh.core.context import Context
context = Context(path="example")
context.run_tests()
  1"""
  2# Context
  3
  4A SQLMesh context encapsulates a SQLMesh environment. When you create a new context, it will discover and
  5load your project's models, macros, and audits. Afterwards, you can use the context to create and apply
  6plans, visualize your model's lineage, run your audits and model tests, and perform various other tasks.
  7For more information regarding what a context can do, see `sqlmesh.core.context.Context`.
  8
  9# Examples:
 10
 11Creating and applying a plan against the staging environment.
 12```python
 13from sqlmesh.core.context import Context
 14context = Context(path="example", config="local_config")
 15plan = context.plan("staging")
 16context.apply(plan)
 17```
 18
 19Running audits on your data.
 20```python
 21from sqlmesh.core.context import Context
 22context = Context(path="example", config="local_config")
 23context.audit("yesterday", "now")
 24```
 25
 26Running tests on your models.
 27```python
 28from sqlmesh.core.context import Context
 29context = Context(path="example")
 30context.run_tests()
 31```
 32"""
 33from __future__ import annotations
 34
 35import abc
 36import contextlib
 37import typing as t
 38import unittest.result
 39from io import StringIO
 40from pathlib import Path
 41from types import MappingProxyType
 42
 43import pandas as pd
 44from sqlglot import exp
 45
 46from sqlmesh.core import constants as c
 47from sqlmesh.core._typing import NotificationTarget
 48from sqlmesh.core.audit import Audit
 49from sqlmesh.core.config import Config, load_config_from_paths
 50from sqlmesh.core.console import Console, get_console
 51from sqlmesh.core.context_diff import ContextDiff
 52from sqlmesh.core.dialect import format_model_expressions, pandas_to_sql, parse
 53from sqlmesh.core.engine_adapter import EngineAdapter
 54from sqlmesh.core.environment import Environment
 55from sqlmesh.core.hooks import hook
 56from sqlmesh.core.loader import Loader, SqlMeshLoader, update_model_schemas
 57from sqlmesh.core.macros import ExecutableOrMacro
 58from sqlmesh.core.model import Model
 59from sqlmesh.core.plan import Plan
 60from sqlmesh.core.scheduler import Scheduler
 61from sqlmesh.core.snapshot import (
 62    Snapshot,
 63    SnapshotEvaluator,
 64    SnapshotFingerprint,
 65    to_table_mapping,
 66)
 67from sqlmesh.core.state_sync import StateReader, StateSync
 68from sqlmesh.core.test import run_all_model_tests, run_model_tests
 69from sqlmesh.core.user import User
 70from sqlmesh.utils import UniqueKeyDict, sys_path
 71from sqlmesh.utils.dag import DAG
 72from sqlmesh.utils.date import TimeLike, yesterday_ds
 73from sqlmesh.utils.errors import ConfigError, MissingDependencyError, PlanError
 74
 75if t.TYPE_CHECKING:
 76    import graphviz
 77    import pyspark
 78
 79    from sqlmesh.core.engine_adapter._typing import DF
 80
 81    ModelOrSnapshot = t.Union[str, Model, Snapshot]
 82
 83
 84class BaseContext(abc.ABC):
 85    """The base context which defines methods to execute a model."""
 86
 87    @property
 88    @abc.abstractmethod
 89    def _model_tables(self) -> t.Dict[str, str]:
 90        """Returns a mapping of model names to tables."""
 91
 92    @property
 93    @abc.abstractmethod
 94    def engine_adapter(self) -> EngineAdapter:
 95        """Returns an engine adapter."""
 96
 97    @property
 98    def spark(self) -> t.Optional[pyspark.sql.SparkSession]:
 99        """Returns the spark session if it exists."""
100        return self.engine_adapter.spark
101
102    def table(self, model_name: str) -> str:
103        """Gets the physical table name for a given model.
104
105        Args:
106            model_name: The model name.
107
108        Returns:
109            The physical table name.
110        """
111        return self._model_tables[model_name]
112
113    def fetchdf(self, query: t.Union[exp.Expression, str]) -> pd.DataFrame:
114        """Fetches a dataframe given a sql string or sqlglot expression.
115
116        Args:
117            query: SQL string or sqlglot expression.
118
119        Returns:
120            The default dataframe is Pandas, but for Spark a PySpark dataframe is returned.
121        """
122        return self.engine_adapter.fetchdf(query)
123
124    def fetch_pyspark_df(self, query: t.Union[exp.Expression, str]) -> pyspark.sql.DataFrame:
125        """Fetches a PySpark dataframe given a sql string or sqlglot expression.
126
127        Args:
128            query: SQL string or sqlglot expression.
129
130        Returns:
131            A PySpark dataframe.
132        """
133        return self.engine_adapter.fetch_pyspark_df(query)
134
135
136class ExecutionContext(BaseContext):
137    """The minimal context needed to execute a model.
138
139    Args:
140        engine_adapter: The engine adapter to execute queries against.
141        snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
142        is_dev: Indicates whether the evaluation happens in the development mode and temporary
143            tables / table clones should be used where applicable.
144    """
145
146    def __init__(
147        self,
148        engine_adapter: EngineAdapter,
149        snapshots: t.Dict[str, Snapshot],
150        is_dev: bool,
151    ):
152        self.snapshots = snapshots
153        self.is_dev = is_dev
154        self._engine_adapter = engine_adapter
155        self.__model_tables = to_table_mapping(snapshots.values(), is_dev)
156
157    @property
158    def engine_adapter(self) -> EngineAdapter:
159        """Returns an engine adapter."""
160        return self._engine_adapter
161
162    @property
163    def _model_tables(self) -> t.Dict[str, str]:
164        """Returns a mapping of model names to tables."""
165        return self.__model_tables
166
167
168class Context(BaseContext):
169    """Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks.
170
171    Args:
172        engine_adapter: The default engine adapter to use.
173        notification_targets: The notification target to use. Defaults to what is defined in config.
174        dialect: Default dialect of the sql in models.
175        physical_schema: The schema used to store physical materialized tables.
176        snapshot_ttl: Duration before unpromoted snapshots are removed.
177        path: The directory containing SQLMesh files.
178        config: A Config object or the name of a Config object in config.py.
179        connection: The name of the connection. If not specified the first connection as it appears
180            in configuration will be used.
181        test_connection: The name of the connection to use for tests. If not specified the first
182            connection as it appears in configuration will be used.
183        concurrent_tasks: The maximum number of tasks that can use the connection concurrently.
184        load: Whether or not to automatically load all models and macros (default True).
185        console: The rich instance used for printing out CLI command results.
186        users: A list of users to make known to SQLMesh.
187    """
188
189    def __init__(
190        self,
191        engine_adapter: t.Optional[EngineAdapter] = None,
192        notification_targets: t.Optional[t.List[NotificationTarget]] = None,
193        state_sync: t.Optional[StateSync] = None,
194        dialect: str = "",
195        physical_schema: str = "",
196        snapshot_ttl: str = "",
197        path: str = "",
198        config: t.Optional[t.Union[Config, str]] = None,
199        connection: t.Optional[str] = None,
200        test_connection: t.Optional[str] = None,
201        concurrent_tasks: t.Optional[int] = None,
202        loader: t.Optional[t.Type[Loader]] = None,
203        load: bool = True,
204        console: t.Optional[Console] = None,
205        users: t.Optional[t.List[User]] = None,
206    ):
207        self.console = console or get_console()
208        self.path = Path(path).absolute()
209        if not self.path.is_dir():
210            raise ConfigError(f"{path} is not a directory")
211
212        self.config = self._load_config(config or "config")
213
214        self.physical_schema = physical_schema or self.config.physical_schema or "sqlmesh"
215        self.snapshot_ttl = snapshot_ttl or self.config.snapshot_ttl or c.DEFAULT_SNAPSHOT_TTL
216        self.dag: DAG[str] = DAG()
217
218        self._models: UniqueKeyDict[str, Model] = UniqueKeyDict("models")
219        self._audits: UniqueKeyDict[str, Audit] = UniqueKeyDict("audits")
220        self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros")
221        self._hooks: UniqueKeyDict[str, hook] = UniqueKeyDict("hooks")
222
223        self.connection = connection
224        connection_config = self.config.get_connection(connection)
225        self.concurrent_tasks = concurrent_tasks or connection_config.concurrent_tasks
226        self._engine_adapter = engine_adapter or connection_config.create_engine_adapter()
227
228        test_connection_config = (
229            self.config.test_connection
230            if test_connection is None
231            else self.config.get_connection(test_connection)
232        )
233        self._test_engine_adapter = test_connection_config.create_engine_adapter()
234
235        self.dialect = dialect or self.config.model_defaults.dialect or self._engine_adapter.dialect
236
237        self.snapshot_evaluator = SnapshotEvaluator(
238            self.engine_adapter, ddl_concurrent_tasks=self.concurrent_tasks
239        )
240
241        self.notification_targets = self.config.notification_targets + (notification_targets or [])
242
243        self._provided_state_sync: t.Optional[StateSync] = state_sync
244        self._state_sync: t.Optional[StateSync] = None
245        self._state_reader: t.Optional[StateReader] = None
246
247        self.users = self.config.users + (users or [])
248
249        self._loader = (loader or self.config.loader or SqlMeshLoader)()
250
251        if load:
252            self.load()
253
254    @property
255    def engine_adapter(self) -> EngineAdapter:
256        """Returns an engine adapter."""
257        return self._engine_adapter
258
259    def upsert_model(self, model: t.Union[str, Model], **kwargs: t.Any) -> Model:
260        """Update or insert a model.
261
262        The context's models dictionary will be updated to include these changes.
263
264        Args:
265            model: Model name or instance to update.
266            kwargs: The kwargs to update the model with.
267
268        Returns:
269            A new instance of the updated or inserted model.
270        """
271        if isinstance(model, str):
272            model = self._models[model]
273
274        path = model._path  # type: ignore
275        # model.copy() can't be used here due to a cached state that can be a part of a model instance.
276        model = t.cast(Model, type(model)(**{**t.cast(Model, model).dict(), **kwargs}))
277        model._path = path
278        self._models.update({model.name: model})
279
280        self._add_model_to_dag(model)
281        update_model_schemas(self.dialect, self.dag, self._models)
282
283        return model
284
285    def scheduler(self, environment: t.Optional[str] = None) -> Scheduler:
286        """Returns the built-in scheduler.
287
288        Args:
289            environment: The target environment to source model snapshots from, or None
290                if snapshots should be sourced from the currently loaded local state.
291
292        Returns:
293            The built-in scheduler instance.
294        """
295        snapshots: t.Iterable[Snapshot]
296        if environment is not None:
297            stored_environment = self.state_sync.get_environment(environment)
298            if stored_environment is None:
299                raise ConfigError(f"Environment '{environment}' was not found.")
300            snapshots = self.state_sync.get_snapshots(stored_environment.snapshots).values()
301        else:
302            snapshots = self.snapshots.values()
303
304        if not snapshots:
305            raise ConfigError("No models were found")
306
307        return Scheduler(
308            snapshots,
309            self.snapshot_evaluator,
310            self.state_sync,
311            max_workers=self.concurrent_tasks,
312            console=self.console,
313        )
314
315    @property
316    def state_sync(self) -> StateSync:
317        if not self._state_sync:
318            self._state_sync = self._provided_state_sync or self.config.scheduler.create_state_sync(
319                self
320            )
321            if not self._state_sync:
322                raise ConfigError(
323                    "The operation is not supported when using a read-only state sync"
324                )
325            self._state_sync.init_schema()
326        return self._state_sync
327
328    @property
329    def state_reader(self) -> StateReader:
330        if not self._state_reader:
331            try:
332                self._state_reader = self.state_sync
333            except ConfigError:
334                self._state_reader = self.config.scheduler.create_state_reader(self)
335            if not self._state_reader:
336                raise ConfigError(
337                    "Invalid configuration: neither State Sync nor Reader has been configured"
338                )
339        return self._state_reader
340
341    @property
342    def sqlmesh_path(self) -> Path:
343        """Path to the SQLMesh home directory."""
344        return Path.home() / ".sqlmesh"
345
346    @property
347    def models_directory_path(self) -> Path:
348        """Path to the directory where the models are defined"""
349        return self.path / "models"
350
351    @property
352    def macro_directory_path(self) -> Path:
353        """Path to the directory where macros are defined"""
354        return self.path / "macros"
355
356    @property
357    def hook_directory_path(self) -> Path:
358        """Path to the directory where hooks are defined"""
359        return self.path / "hooks"
360
361    @property
362    def test_directory_path(self) -> Path:
363        return self.path / "tests"
364
365    @property
366    def audits_directory_path(self) -> Path:
367        return self.path / "audits"
368
369    @property
370    def ignore_patterns(self) -> t.List[str]:
371        return c.IGNORE_PATTERNS + self.config.ignore_patterns
372
373    def refresh(self) -> None:
374        """Refresh all models that have been updated."""
375        if self._loader.reload_needed():
376            self.load()
377
378    def load(self) -> Context:
379        """Load all files in the context's path."""
380        with sys_path(self.path):
381            project = self._loader.load(self)
382            self._hooks = project.hooks
383            self._macros = project.macros
384            self._models = project.models
385            self._audits = project.audits
386            self.dag = project.dag
387
388        return self
389
390    def run(
391        self,
392        environment: t.Optional[str] = None,
393        *,
394        start: t.Optional[TimeLike] = None,
395        end: t.Optional[TimeLike] = None,
396        latest: t.Optional[TimeLike] = None,
397        skip_janitor: bool = False,
398    ) -> None:
399        """Run the entire dag through the scheduler.
400
401        Args:
402            environment: The target environment to source model snapshots from. Default: prod.
403            start: The start of the interval to render.
404            end: The end of the interval to render.
405            latest: The latest time used for non incremental datasets.
406            skip_janitor: Whether to skip the jantitor task.
407        """
408        self.scheduler(environment=environment or c.PROD).run(start, end, latest)
409
410        if not skip_janitor:
411            self._run_janitor()
412
413    def get_model(self, name: str) -> t.Optional[Model]:
414        """Returns a model with the given name or None if a model with such name doesn't exist."""
415        return self._models.get(name)
416
417    @property
418    def models(self) -> MappingProxyType[str, Model]:
419        """Returns all registered models in this context."""
420        return MappingProxyType(self._models)
421
422    @property
423    def macros(self) -> MappingProxyType[str, ExecutableOrMacro]:
424        """Returns all registered macros in this context."""
425        return MappingProxyType(self._macros)
426
427    @property
428    def hooks(self) -> MappingProxyType[str, hook]:
429        """Returns all registered hooks in this context."""
430        return MappingProxyType(self._hooks)
431
432    @property
433    def snapshots(self) -> t.Dict[str, Snapshot]:
434        """Generates and returns snapshots based on models registered in this context.
435
436        If one of the snapshots has been previosly stored in the persisted state, the stored
437        instance will be returned.
438        """
439        local_snapshots = self.local_snapshots
440
441        stored_snapshots = self.state_reader.get_snapshots(
442            [s.snapshot_id for s in local_snapshots.values()]
443        )
444
445        return {name: stored_snapshots.get(s.snapshot_id, s) for name, s in local_snapshots.items()}
446
447    @property
448    def local_snapshots(self) -> t.Dict[str, Snapshot]:
449        """Generates and returns snapshots based on models registered in this context without reconciling them
450        with the persisted state.
451        """
452        local_snapshots = {}
453        fingerprint_cache: t.Dict[str, SnapshotFingerprint] = {}
454        for model in self._models.values():
455            snapshot = Snapshot.from_model(
456                model,
457                physical_schema=self.physical_schema,
458                models=self._models,
459                ttl=self.snapshot_ttl,
460                audits=self._audits,
461                cache=fingerprint_cache,
462            )
463            local_snapshots[model.name] = snapshot
464        return local_snapshots
465
466    def render(
467        self,
468        model_or_snapshot: ModelOrSnapshot,
469        *,
470        start: t.Optional[TimeLike] = None,
471        end: t.Optional[TimeLike] = None,
472        latest: t.Optional[TimeLike] = None,
473        expand: t.Union[bool, t.Iterable[str]] = False,
474        **kwargs: t.Any,
475    ) -> exp.Expression:
476        """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models.
477
478        Args:
479            model_or_snapshot: The model, model name, or snapshot to render.
480            start: The start of the interval to render.
481            end: The end of the interval to render.
482            latest: The latest time used for non incremental datasets.
483            expand: Whether or not to use expand materialized models, defaults to False.
484                If True, all referenced models are expanded as raw queries.
485                If a list, only referenced models are expanded as raw queries.
486
487        Returns:
488            The rendered expression.
489        """
490        latest = latest or yesterday_ds()
491
492        if isinstance(model_or_snapshot, str):
493            model = self._models[model_or_snapshot]
494        elif isinstance(model_or_snapshot, Snapshot):
495            model = model_or_snapshot.model
496        else:
497            model = model_or_snapshot
498
499        expand = self.dag.upstream(model.name) if expand is True else expand or []
500
501        if model.is_seed:
502            df = next(model.render(self, start=start, end=end, latest=latest, **kwargs))
503            return next(pandas_to_sql(df, model.columns_to_types))
504
505        return model.render_query(
506            start=start,
507            end=end,
508            latest=latest,
509            snapshots=self.snapshots,
510            expand=expand,
511            **kwargs,
512        )
513
514    def evaluate(
515        self,
516        model_or_snapshot: ModelOrSnapshot,
517        start: TimeLike,
518        end: TimeLike,
519        latest: TimeLike,
520        limit: t.Optional[int] = None,
521        **kwargs: t.Any,
522    ) -> DF:
523        """Evaluate a model or snapshot (running its query against a DB/Engine).
524
525        This method is used to test or iterate on models without side effects.
526
527        Args:
528            model_or_snapshot: The model, model name, or snapshot to render.
529            start: The start of the interval to evaluate.
530            end: The end of the interval to evaluate.
531            latest: The latest time used for non incremental datasets.
532            limit: A limit applied to the model.
533        """
534        if isinstance(model_or_snapshot, str):
535            snapshot = self.snapshots[model_or_snapshot]
536        elif isinstance(model_or_snapshot, Snapshot):
537            snapshot = model_or_snapshot
538        else:
539            snapshot = self.snapshots[model_or_snapshot.name]
540
541        df = self.snapshot_evaluator.evaluate(
542            snapshot,
543            start,
544            end,
545            latest,
546            snapshots=self.snapshots,
547            limit=limit or c.DEFAULT_MAX_LIMIT,
548        )
549
550        if df is None:
551            raise RuntimeError(f"Error evaluating {snapshot.model.name}")
552
553        return df
554
555    def format(self) -> None:
556        """Format all models in a given directory."""
557        for model in self._models.values():
558            if not model.is_sql:
559                continue
560            with open(model._path, "r+", encoding="utf-8") as file:
561                expressions = parse(file.read(), default_dialect=self.dialect)
562                file.seek(0)
563                file.write(format_model_expressions(expressions, model.dialect))
564                file.truncate()
565
566    def plan(
567        self,
568        environment: t.Optional[str] = None,
569        *,
570        start: t.Optional[TimeLike] = None,
571        end: t.Optional[TimeLike] = None,
572        create_from: t.Optional[str] = None,
573        skip_tests: bool = False,
574        restate_models: t.Optional[t.Iterable[str]] = None,
575        no_gaps: bool = False,
576        skip_backfill: bool = False,
577        forward_only: bool = False,
578        no_prompts: bool = False,
579        auto_apply: bool = False,
580        no_auto_categorization: t.Optional[bool] = None,
581    ) -> Plan:
582        """Interactively create a migration plan.
583
584        This method compares the current context with an environment. It then presents
585        the differences and asks whether to backfill each modified model.
586
587        Args:
588            environment: The environment to diff and plan against.
589            start: The start date of the backfill if there is one.
590            end: The end date of the backfill if there is one.
591            create_from: The environment to create the target environment from if it
592                doesn't exist. If not specified, the "prod" environment will be used.
593            skip_tests: Unit tests are run by default so this will skip them if enabled
594            restate_models: A list of of either internal or external models that need to be restated
595                for the given plan interval. If the target environment is a production environment,
596                ALL snapshots that depended on these upstream tables will have their intervals deleted
597                (even ones not in this current environment). Only the snapshots in this environment will
598                be backfilled whereas others need to be recovered on a future plan application. For development
599                environments only snapshots that are part of this plan will be affected.
600            no_gaps:  Whether to ensure that new snapshots for models that are already a
601                part of the target environment have no data gaps when compared against previous
602                snapshots for same models.
603            skip_backfill: Whether to skip the backfill step. Default: False.
604            forward_only: Whether the purpose of the plan is to make forward only changes.
605            no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that
606                if this flag is set to true and there are uncategorized changes the plan creation will
607                fail. Default: False.
608            auto_apply: Whether to automatically apply the new plan after creation. Default: False.
609            no_auto_categorization: Indicates whether to disable automatic categorization of model
610                changes (breaking / non-breaking). If not provided, then the corresponding configuration
611                option determines the behavior.
612
613        Returns:
614            The populated Plan object.
615        """
616        environment = environment or c.PROD
617        environment = Environment.normalize_name(environment)
618
619        if skip_backfill and not no_gaps and environment == c.PROD:
620            raise ConfigError(
621                "When targeting the production enviornment either the backfill should not be skipped or the lack of data gaps should be enforced (--no-gaps flag)."
622            )
623
624        self._run_plan_tests(skip_tests)
625
626        plan = Plan(
627            context_diff=self._context_diff(environment or c.PROD, create_from=create_from),
628            dag=self.dag,
629            state_reader=self.state_reader,
630            start=start,
631            end=end,
632            apply=self.apply,
633            restate_models=restate_models,
634            no_gaps=no_gaps,
635            skip_backfill=skip_backfill,
636            is_dev=environment != c.PROD,
637            forward_only=forward_only,
638            environment_ttl=self.config.environment_ttl,
639            categorizer_config=self.config.auto_categorize_changes,
640            auto_categorization_enabled=not no_auto_categorization,
641        )
642
643        if not no_prompts:
644            self.console.plan(plan, auto_apply)
645        elif auto_apply:
646            self.apply(plan)
647
648        return plan
649
650    def apply(self, plan: Plan) -> None:
651        """Applies a plan by pushing snapshots and backfilling data.
652
653        Given a plan, it pushes snapshots into the state sync and then uses the scheduler
654        to backfill all models.
655
656        Args:
657            plan: The plan to apply.
658        """
659        if not plan.context_diff.has_changes and not plan.requires_backfill:
660            return
661        if plan.uncategorized:
662            raise PlanError("Can't apply a plan with uncategorized changes.")
663        self.config.scheduler.create_plan_evaluator(self).evaluate(plan)
664
665    def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> None:
666        """Show a diff of the current context with a given environment.
667
668        Args:
669            environment: The environment to diff against.
670            detailed: Show the actual SQL differences if True.
671        """
672        environment = environment or c.PROD
673        environment = Environment.normalize_name(environment)
674        self.console.show_model_difference_summary(
675            self._context_diff(environment or c.PROD), detailed
676        )
677
678    def get_dag(self, format: str = "svg") -> graphviz.Digraph:
679        """Gets a graphviz dag.
680
681        This method requires installing the graphviz base library through your package manager
682        and the python graphviz library.
683
684        To display within Databricks:
685        displayHTML(context.get_dag().pipe(encoding='utf-8'))
686
687        Args:
688            format: The desired format to use for representing the graph
689        """
690        from sqlmesh import runtime_env
691
692        try:
693            import graphviz  # type: ignore
694        except ModuleNotFoundError as e:
695            if runtime_env.is_databricks:
696                raise MissingDependencyError(
697                    "Rendering a dag requires graphviz. Run `pip install graphviz` and then `sudo apt-get install -y python3-dev graphviz libgraphviz-dev pkg-config`"
698                )
699            raise MissingDependencyError(
700                "Rendering a dag requires a manual install of graphviz. Run `pip install graphviz` and then install graphviz library: https://graphviz.org/download/."
701            ) from e
702
703        graph = graphviz.Digraph(node_attr={"shape": "box"}, format=format)
704
705        for name, upstream in self.dag.graph.items():
706            graph.node(name)
707            for u in upstream:
708                graph.edge(u, name)
709        return graph
710
711    def render_dag(self, path: str, format: str = "jpeg") -> str:
712        """Render the dag using graphviz.
713
714        This method requires installing the graphviz base library through your package manager
715        and the python graphviz library.
716
717        Args:
718            path: filename to save the dag to
719            format: The desired format to use when rending the dag
720        """
721        graph = self.get_dag(format=format)
722
723        try:
724            return graph.render(path, format=format)
725        except graphviz.backend.execute.ExecutableNotFound as e:
726            raise MissingDependencyError(
727                "Graphviz is pip-installed but the system install is missing. Instructions: https://graphviz.org/download/"
728            ) from e
729
730    def test(
731        self,
732        match_patterns: t.Optional[t.List[str]] = None,
733        tests: t.Optional[t.List[str]] = None,
734        verbose: bool = False,
735    ) -> unittest.result.TestResult:
736        """Discover and run model tests"""
737        verbosity = 2 if verbose else 1
738        try:
739            if tests:
740                result = run_model_tests(
741                    tests=tests,
742                    snapshots=self.local_snapshots,
743                    engine_adapter=self._test_engine_adapter,
744                    verbosity=verbosity,
745                    patterns=match_patterns,
746                    ignore_patterns=self.ignore_patterns,
747                )
748            else:
749                result = run_all_model_tests(
750                    path=self.test_directory_path,
751                    snapshots=self.local_snapshots,
752                    engine_adapter=self._test_engine_adapter,
753                    verbosity=verbosity,
754                    patterns=match_patterns,
755                    ignore_patterns=self.ignore_patterns,
756                )
757        finally:
758            self._test_engine_adapter.close()
759        return result
760
761    def audit(
762        self,
763        start: TimeLike,
764        end: TimeLike,
765        *,
766        models: t.Optional[t.Iterator[str]] = None,
767        latest: t.Optional[TimeLike] = None,
768    ) -> None:
769        """Audit models.
770
771        Args:
772            start: The start of the interval to audit.
773            end: The end of the interval to audit.
774            models: The models to audit. All models will be audited if not specified.
775            latest: The latest time used for non incremental datasets.
776
777        """
778
779        snapshots = (
780            [self.snapshots[model] for model in models] if models else self.snapshots.values()
781        )
782
783        num_audits = sum(len(snapshot.model.audits) for snapshot in snapshots)
784        self.console.log_status_update(f"Found {num_audits} audit(s).")
785        errors = []
786        for snapshot in snapshots:
787            for audit_result in self.snapshot_evaluator.audit(
788                snapshot=snapshot,
789                start=start,
790                end=end,
791                snapshots=self.snapshots,
792                raise_exception=False,
793            ):
794                if audit_result.count:
795                    errors.append(audit_result)
796                    self.console.log_status_update(f"{audit_result.audit.name} FAIL.")
797                else:
798                    self.console.log_status_update(f"{audit_result.audit.name} PASS.")
799
800        self.console.log_status_update(f"\nFinished with {len(errors)} audit error(s).")
801        for error in errors:
802            self.console.log_status_update(
803                f"\nFailure in audit {error.audit.name} ({error.audit._path})."
804            )
805            self.console.log_status_update(f"Got {error.count} results, expected 0.")
806            self.console.show_sql(f"{error.query}")
807        self.console.log_status_update("Done.")
808
809    def close(self) -> None:
810        """Releases all resources allocated by this context."""
811        self.snapshot_evaluator.close()
812
813    def _run_plan_tests(
814        self, skip_tests: bool = False
815    ) -> t.Tuple[t.Optional[unittest.result.TestResult], t.Optional[str]]:
816        if self._test_engine_adapter and not skip_tests:
817            test_output_io = StringIO()
818            with contextlib.redirect_stderr(test_output_io):
819                result = self.test()
820            test_output = test_output_io.getvalue()
821            self.console.log_test_results(result, test_output, self._test_engine_adapter.dialect)
822            if not result.wasSuccessful():
823                raise PlanError(
824                    "Cannot generate plan due to failing test(s). Fix test(s) and run again"
825                )
826            return result, test_output
827        return None, None
828
829    @property
830    def _model_tables(self) -> t.Dict[str, str]:
831        """Mapping of model name to physical table name.
832
833        If a snapshot has not been versioned yet, its view name will be returned.
834        """
835        return {
836            name: snapshot.table_name()
837            if snapshot.version
838            else snapshot.qualified_view_name.for_environment(c.PROD)
839            for name, snapshot in self.snapshots.items()
840        }
841
842    def _context_diff(
843        self,
844        environment: str | Environment,
845        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
846        create_from: t.Optional[str] = None,
847    ) -> ContextDiff:
848        environment = Environment.normalize_name(environment)
849        return ContextDiff.create(
850            environment, snapshots or self.snapshots, create_from or c.PROD, self.state_reader
851        )
852
853    def _load_config(self, config: t.Union[str, Config]) -> Config:
854        if isinstance(config, Config):
855            return config
856
857        lookup_paths = [
858            self.sqlmesh_path / "config.yml",
859            self.sqlmesh_path / "config.yaml",
860            self.path / "config.py",
861            self.path / "config.yml",
862            self.path / "config.yaml",
863        ]
864        return load_config_from_paths(*lookup_paths, config_name=config)
865
866    def _add_model_to_dag(self, model: Model) -> None:
867        self.dag.graph[model.name] = set()
868
869        self.dag.add(model.name, model.depends_on)
870
871    def _run_janitor(self) -> None:
872        expired_environments = self.state_sync.delete_expired_environments()
873        for expired_environment in expired_environments:
874            self.snapshot_evaluator.demote(expired_environment.snapshots, expired_environment.name)
875
876        expired_snapshots = self.state_sync.delete_expired_snapshots()
877        self.snapshot_evaluator.cleanup(expired_snapshots)
class BaseContext(abc.ABC):
 85class BaseContext(abc.ABC):
 86    """The base context which defines methods to execute a model."""
 87
 88    @property
 89    @abc.abstractmethod
 90    def _model_tables(self) -> t.Dict[str, str]:
 91        """Returns a mapping of model names to tables."""
 92
 93    @property
 94    @abc.abstractmethod
 95    def engine_adapter(self) -> EngineAdapter:
 96        """Returns an engine adapter."""
 97
 98    @property
 99    def spark(self) -> t.Optional[pyspark.sql.SparkSession]:
100        """Returns the spark session if it exists."""
101        return self.engine_adapter.spark
102
103    def table(self, model_name: str) -> str:
104        """Gets the physical table name for a given model.
105
106        Args:
107            model_name: The model name.
108
109        Returns:
110            The physical table name.
111        """
112        return self._model_tables[model_name]
113
114    def fetchdf(self, query: t.Union[exp.Expression, str]) -> pd.DataFrame:
115        """Fetches a dataframe given a sql string or sqlglot expression.
116
117        Args:
118            query: SQL string or sqlglot expression.
119
120        Returns:
121            The default dataframe is Pandas, but for Spark a PySpark dataframe is returned.
122        """
123        return self.engine_adapter.fetchdf(query)
124
125    def fetch_pyspark_df(self, query: t.Union[exp.Expression, str]) -> pyspark.sql.DataFrame:
126        """Fetches a PySpark dataframe given a sql string or sqlglot expression.
127
128        Args:
129            query: SQL string or sqlglot expression.
130
131        Returns:
132            A PySpark dataframe.
133        """
134        return self.engine_adapter.fetch_pyspark_df(query)

The base context which defines methods to execute a model.

Returns an engine adapter.

spark: Optional[pyspark.sql.session.SparkSession]

Returns the spark session if it exists.

def table(self, model_name: str) -> str:
103    def table(self, model_name: str) -> str:
104        """Gets the physical table name for a given model.
105
106        Args:
107            model_name: The model name.
108
109        Returns:
110            The physical table name.
111        """
112        return self._model_tables[model_name]

Gets the physical table name for a given model.

Arguments:
  • model_name: The model name.
Returns:

The physical table name.

def fetchdf( self, query: Union[sqlglot.expressions.Expression, str]) -> pandas.core.frame.DataFrame:
114    def fetchdf(self, query: t.Union[exp.Expression, str]) -> pd.DataFrame:
115        """Fetches a dataframe given a sql string or sqlglot expression.
116
117        Args:
118            query: SQL string or sqlglot expression.
119
120        Returns:
121            The default dataframe is Pandas, but for Spark a PySpark dataframe is returned.
122        """
123        return self.engine_adapter.fetchdf(query)

Fetches a dataframe given a sql string or sqlglot expression.

Arguments:
  • query: SQL string or sqlglot expression.
Returns:

The default dataframe is Pandas, but for Spark a PySpark dataframe is returned.

def fetch_pyspark_df( self, query: Union[sqlglot.expressions.Expression, str]) -> pyspark.sql.dataframe.DataFrame:
125    def fetch_pyspark_df(self, query: t.Union[exp.Expression, str]) -> pyspark.sql.DataFrame:
126        """Fetches a PySpark dataframe given a sql string or sqlglot expression.
127
128        Args:
129            query: SQL string or sqlglot expression.
130
131        Returns:
132            A PySpark dataframe.
133        """
134        return self.engine_adapter.fetch_pyspark_df(query)

Fetches a PySpark dataframe given a sql string or sqlglot expression.

Arguments:
  • query: SQL string or sqlglot expression.
Returns:

A PySpark dataframe.

class ExecutionContext(BaseContext):
137class ExecutionContext(BaseContext):
138    """The minimal context needed to execute a model.
139
140    Args:
141        engine_adapter: The engine adapter to execute queries against.
142        snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
143        is_dev: Indicates whether the evaluation happens in the development mode and temporary
144            tables / table clones should be used where applicable.
145    """
146
147    def __init__(
148        self,
149        engine_adapter: EngineAdapter,
150        snapshots: t.Dict[str, Snapshot],
151        is_dev: bool,
152    ):
153        self.snapshots = snapshots
154        self.is_dev = is_dev
155        self._engine_adapter = engine_adapter
156        self.__model_tables = to_table_mapping(snapshots.values(), is_dev)
157
158    @property
159    def engine_adapter(self) -> EngineAdapter:
160        """Returns an engine adapter."""
161        return self._engine_adapter
162
163    @property
164    def _model_tables(self) -> t.Dict[str, str]:
165        """Returns a mapping of model names to tables."""
166        return self.__model_tables

The minimal context needed to execute a model.

Arguments:
  • engine_adapter: The engine adapter to execute queries against.
  • snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
  • is_dev: Indicates whether the evaluation happens in the development mode and temporary tables / table clones should be used where applicable.
ExecutionContext( engine_adapter: sqlmesh.core.engine_adapter.base.EngineAdapter, snapshots: Dict[str, sqlmesh.core.snapshot.definition.Snapshot], is_dev: bool)
147    def __init__(
148        self,
149        engine_adapter: EngineAdapter,
150        snapshots: t.Dict[str, Snapshot],
151        is_dev: bool,
152    ):
153        self.snapshots = snapshots
154        self.is_dev = is_dev
155        self._engine_adapter = engine_adapter
156        self.__model_tables = to_table_mapping(snapshots.values(), is_dev)

Returns an engine adapter.

class Context(BaseContext):
169class Context(BaseContext):
170    """Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks.
171
172    Args:
173        engine_adapter: The default engine adapter to use.
174        notification_targets: The notification target to use. Defaults to what is defined in config.
175        dialect: Default dialect of the sql in models.
176        physical_schema: The schema used to store physical materialized tables.
177        snapshot_ttl: Duration before unpromoted snapshots are removed.
178        path: The directory containing SQLMesh files.
179        config: A Config object or the name of a Config object in config.py.
180        connection: The name of the connection. If not specified the first connection as it appears
181            in configuration will be used.
182        test_connection: The name of the connection to use for tests. If not specified the first
183            connection as it appears in configuration will be used.
184        concurrent_tasks: The maximum number of tasks that can use the connection concurrently.
185        load: Whether or not to automatically load all models and macros (default True).
186        console: The rich instance used for printing out CLI command results.
187        users: A list of users to make known to SQLMesh.
188    """
189
190    def __init__(
191        self,
192        engine_adapter: t.Optional[EngineAdapter] = None,
193        notification_targets: t.Optional[t.List[NotificationTarget]] = None,
194        state_sync: t.Optional[StateSync] = None,
195        dialect: str = "",
196        physical_schema: str = "",
197        snapshot_ttl: str = "",
198        path: str = "",
199        config: t.Optional[t.Union[Config, str]] = None,
200        connection: t.Optional[str] = None,
201        test_connection: t.Optional[str] = None,
202        concurrent_tasks: t.Optional[int] = None,
203        loader: t.Optional[t.Type[Loader]] = None,
204        load: bool = True,
205        console: t.Optional[Console] = None,
206        users: t.Optional[t.List[User]] = None,
207    ):
208        self.console = console or get_console()
209        self.path = Path(path).absolute()
210        if not self.path.is_dir():
211            raise ConfigError(f"{path} is not a directory")
212
213        self.config = self._load_config(config or "config")
214
215        self.physical_schema = physical_schema or self.config.physical_schema or "sqlmesh"
216        self.snapshot_ttl = snapshot_ttl or self.config.snapshot_ttl or c.DEFAULT_SNAPSHOT_TTL
217        self.dag: DAG[str] = DAG()
218
219        self._models: UniqueKeyDict[str, Model] = UniqueKeyDict("models")
220        self._audits: UniqueKeyDict[str, Audit] = UniqueKeyDict("audits")
221        self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros")
222        self._hooks: UniqueKeyDict[str, hook] = UniqueKeyDict("hooks")
223
224        self.connection = connection
225        connection_config = self.config.get_connection(connection)
226        self.concurrent_tasks = concurrent_tasks or connection_config.concurrent_tasks
227        self._engine_adapter = engine_adapter or connection_config.create_engine_adapter()
228
229        test_connection_config = (
230            self.config.test_connection
231            if test_connection is None
232            else self.config.get_connection(test_connection)
233        )
234        self._test_engine_adapter = test_connection_config.create_engine_adapter()
235
236        self.dialect = dialect or self.config.model_defaults.dialect or self._engine_adapter.dialect
237
238        self.snapshot_evaluator = SnapshotEvaluator(
239            self.engine_adapter, ddl_concurrent_tasks=self.concurrent_tasks
240        )
241
242        self.notification_targets = self.config.notification_targets + (notification_targets or [])
243
244        self._provided_state_sync: t.Optional[StateSync] = state_sync
245        self._state_sync: t.Optional[StateSync] = None
246        self._state_reader: t.Optional[StateReader] = None
247
248        self.users = self.config.users + (users or [])
249
250        self._loader = (loader or self.config.loader or SqlMeshLoader)()
251
252        if load:
253            self.load()
254
255    @property
256    def engine_adapter(self) -> EngineAdapter:
257        """Returns an engine adapter."""
258        return self._engine_adapter
259
260    def upsert_model(self, model: t.Union[str, Model], **kwargs: t.Any) -> Model:
261        """Update or insert a model.
262
263        The context's models dictionary will be updated to include these changes.
264
265        Args:
266            model: Model name or instance to update.
267            kwargs: The kwargs to update the model with.
268
269        Returns:
270            A new instance of the updated or inserted model.
271        """
272        if isinstance(model, str):
273            model = self._models[model]
274
275        path = model._path  # type: ignore
276        # model.copy() can't be used here due to a cached state that can be a part of a model instance.
277        model = t.cast(Model, type(model)(**{**t.cast(Model, model).dict(), **kwargs}))
278        model._path = path
279        self._models.update({model.name: model})
280
281        self._add_model_to_dag(model)
282        update_model_schemas(self.dialect, self.dag, self._models)
283
284        return model
285
286    def scheduler(self, environment: t.Optional[str] = None) -> Scheduler:
287        """Returns the built-in scheduler.
288
289        Args:
290            environment: The target environment to source model snapshots from, or None
291                if snapshots should be sourced from the currently loaded local state.
292
293        Returns:
294            The built-in scheduler instance.
295        """
296        snapshots: t.Iterable[Snapshot]
297        if environment is not None:
298            stored_environment = self.state_sync.get_environment(environment)
299            if stored_environment is None:
300                raise ConfigError(f"Environment '{environment}' was not found.")
301            snapshots = self.state_sync.get_snapshots(stored_environment.snapshots).values()
302        else:
303            snapshots = self.snapshots.values()
304
305        if not snapshots:
306            raise ConfigError("No models were found")
307
308        return Scheduler(
309            snapshots,
310            self.snapshot_evaluator,
311            self.state_sync,
312            max_workers=self.concurrent_tasks,
313            console=self.console,
314        )
315
316    @property
317    def state_sync(self) -> StateSync:
318        if not self._state_sync:
319            self._state_sync = self._provided_state_sync or self.config.scheduler.create_state_sync(
320                self
321            )
322            if not self._state_sync:
323                raise ConfigError(
324                    "The operation is not supported when using a read-only state sync"
325                )
326            self._state_sync.init_schema()
327        return self._state_sync
328
329    @property
330    def state_reader(self) -> StateReader:
331        if not self._state_reader:
332            try:
333                self._state_reader = self.state_sync
334            except ConfigError:
335                self._state_reader = self.config.scheduler.create_state_reader(self)
336            if not self._state_reader:
337                raise ConfigError(
338                    "Invalid configuration: neither State Sync nor Reader has been configured"
339                )
340        return self._state_reader
341
342    @property
343    def sqlmesh_path(self) -> Path:
344        """Path to the SQLMesh home directory."""
345        return Path.home() / ".sqlmesh"
346
347    @property
348    def models_directory_path(self) -> Path:
349        """Path to the directory where the models are defined"""
350        return self.path / "models"
351
352    @property
353    def macro_directory_path(self) -> Path:
354        """Path to the directory where macros are defined"""
355        return self.path / "macros"
356
357    @property
358    def hook_directory_path(self) -> Path:
359        """Path to the directory where hooks are defined"""
360        return self.path / "hooks"
361
362    @property
363    def test_directory_path(self) -> Path:
364        return self.path / "tests"
365
366    @property
367    def audits_directory_path(self) -> Path:
368        return self.path / "audits"
369
370    @property
371    def ignore_patterns(self) -> t.List[str]:
372        return c.IGNORE_PATTERNS + self.config.ignore_patterns
373
374    def refresh(self) -> None:
375        """Refresh all models that have been updated."""
376        if self._loader.reload_needed():
377            self.load()
378
379    def load(self) -> Context:
380        """Load all files in the context's path."""
381        with sys_path(self.path):
382            project = self._loader.load(self)
383            self._hooks = project.hooks
384            self._macros = project.macros
385            self._models = project.models
386            self._audits = project.audits
387            self.dag = project.dag
388
389        return self
390
391    def run(
392        self,
393        environment: t.Optional[str] = None,
394        *,
395        start: t.Optional[TimeLike] = None,
396        end: t.Optional[TimeLike] = None,
397        latest: t.Optional[TimeLike] = None,
398        skip_janitor: bool = False,
399    ) -> None:
400        """Run the entire dag through the scheduler.
401
402        Args:
403            environment: The target environment to source model snapshots from. Default: prod.
404            start: The start of the interval to render.
405            end: The end of the interval to render.
406            latest: The latest time used for non incremental datasets.
407            skip_janitor: Whether to skip the jantitor task.
408        """
409        self.scheduler(environment=environment or c.PROD).run(start, end, latest)
410
411        if not skip_janitor:
412            self._run_janitor()
413
414    def get_model(self, name: str) -> t.Optional[Model]:
415        """Returns a model with the given name or None if a model with such name doesn't exist."""
416        return self._models.get(name)
417
418    @property
419    def models(self) -> MappingProxyType[str, Model]:
420        """Returns all registered models in this context."""
421        return MappingProxyType(self._models)
422
423    @property
424    def macros(self) -> MappingProxyType[str, ExecutableOrMacro]:
425        """Returns all registered macros in this context."""
426        return MappingProxyType(self._macros)
427
428    @property
429    def hooks(self) -> MappingProxyType[str, hook]:
430        """Returns all registered hooks in this context."""
431        return MappingProxyType(self._hooks)
432
433    @property
434    def snapshots(self) -> t.Dict[str, Snapshot]:
435        """Generates and returns snapshots based on models registered in this context.
436
437        If one of the snapshots has been previosly stored in the persisted state, the stored
438        instance will be returned.
439        """
440        local_snapshots = self.local_snapshots
441
442        stored_snapshots = self.state_reader.get_snapshots(
443            [s.snapshot_id for s in local_snapshots.values()]
444        )
445
446        return {name: stored_snapshots.get(s.snapshot_id, s) for name, s in local_snapshots.items()}
447
448    @property
449    def local_snapshots(self) -> t.Dict[str, Snapshot]:
450        """Generates and returns snapshots based on models registered in this context without reconciling them
451        with the persisted state.
452        """
453        local_snapshots = {}
454        fingerprint_cache: t.Dict[str, SnapshotFingerprint] = {}
455        for model in self._models.values():
456            snapshot = Snapshot.from_model(
457                model,
458                physical_schema=self.physical_schema,
459                models=self._models,
460                ttl=self.snapshot_ttl,
461                audits=self._audits,
462                cache=fingerprint_cache,
463            )
464            local_snapshots[model.name] = snapshot
465        return local_snapshots
466
467    def render(
468        self,
469        model_or_snapshot: ModelOrSnapshot,
470        *,
471        start: t.Optional[TimeLike] = None,
472        end: t.Optional[TimeLike] = None,
473        latest: t.Optional[TimeLike] = None,
474        expand: t.Union[bool, t.Iterable[str]] = False,
475        **kwargs: t.Any,
476    ) -> exp.Expression:
477        """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models.
478
479        Args:
480            model_or_snapshot: The model, model name, or snapshot to render.
481            start: The start of the interval to render.
482            end: The end of the interval to render.
483            latest: The latest time used for non incremental datasets.
484            expand: Whether or not to use expand materialized models, defaults to False.
485                If True, all referenced models are expanded as raw queries.
486                If a list, only referenced models are expanded as raw queries.
487
488        Returns:
489            The rendered expression.
490        """
491        latest = latest or yesterday_ds()
492
493        if isinstance(model_or_snapshot, str):
494            model = self._models[model_or_snapshot]
495        elif isinstance(model_or_snapshot, Snapshot):
496            model = model_or_snapshot.model
497        else:
498            model = model_or_snapshot
499
500        expand = self.dag.upstream(model.name) if expand is True else expand or []
501
502        if model.is_seed:
503            df = next(model.render(self, start=start, end=end, latest=latest, **kwargs))
504            return next(pandas_to_sql(df, model.columns_to_types))
505
506        return model.render_query(
507            start=start,
508            end=end,
509            latest=latest,
510            snapshots=self.snapshots,
511            expand=expand,
512            **kwargs,
513        )
514
515    def evaluate(
516        self,
517        model_or_snapshot: ModelOrSnapshot,
518        start: TimeLike,
519        end: TimeLike,
520        latest: TimeLike,
521        limit: t.Optional[int] = None,
522        **kwargs: t.Any,
523    ) -> DF:
524        """Evaluate a model or snapshot (running its query against a DB/Engine).
525
526        This method is used to test or iterate on models without side effects.
527
528        Args:
529            model_or_snapshot: The model, model name, or snapshot to render.
530            start: The start of the interval to evaluate.
531            end: The end of the interval to evaluate.
532            latest: The latest time used for non incremental datasets.
533            limit: A limit applied to the model.
534        """
535        if isinstance(model_or_snapshot, str):
536            snapshot = self.snapshots[model_or_snapshot]
537        elif isinstance(model_or_snapshot, Snapshot):
538            snapshot = model_or_snapshot
539        else:
540            snapshot = self.snapshots[model_or_snapshot.name]
541
542        df = self.snapshot_evaluator.evaluate(
543            snapshot,
544            start,
545            end,
546            latest,
547            snapshots=self.snapshots,
548            limit=limit or c.DEFAULT_MAX_LIMIT,
549        )
550
551        if df is None:
552            raise RuntimeError(f"Error evaluating {snapshot.model.name}")
553
554        return df
555
556    def format(self) -> None:
557        """Format all models in a given directory."""
558        for model in self._models.values():
559            if not model.is_sql:
560                continue
561            with open(model._path, "r+", encoding="utf-8") as file:
562                expressions = parse(file.read(), default_dialect=self.dialect)
563                file.seek(0)
564                file.write(format_model_expressions(expressions, model.dialect))
565                file.truncate()
566
567    def plan(
568        self,
569        environment: t.Optional[str] = None,
570        *,
571        start: t.Optional[TimeLike] = None,
572        end: t.Optional[TimeLike] = None,
573        create_from: t.Optional[str] = None,
574        skip_tests: bool = False,
575        restate_models: t.Optional[t.Iterable[str]] = None,
576        no_gaps: bool = False,
577        skip_backfill: bool = False,
578        forward_only: bool = False,
579        no_prompts: bool = False,
580        auto_apply: bool = False,
581        no_auto_categorization: t.Optional[bool] = None,
582    ) -> Plan:
583        """Interactively create a migration plan.
584
585        This method compares the current context with an environment. It then presents
586        the differences and asks whether to backfill each modified model.
587
588        Args:
589            environment: The environment to diff and plan against.
590            start: The start date of the backfill if there is one.
591            end: The end date of the backfill if there is one.
592            create_from: The environment to create the target environment from if it
593                doesn't exist. If not specified, the "prod" environment will be used.
594            skip_tests: Unit tests are run by default so this will skip them if enabled
595            restate_models: A list of of either internal or external models that need to be restated
596                for the given plan interval. If the target environment is a production environment,
597                ALL snapshots that depended on these upstream tables will have their intervals deleted
598                (even ones not in this current environment). Only the snapshots in this environment will
599                be backfilled whereas others need to be recovered on a future plan application. For development
600                environments only snapshots that are part of this plan will be affected.
601            no_gaps:  Whether to ensure that new snapshots for models that are already a
602                part of the target environment have no data gaps when compared against previous
603                snapshots for same models.
604            skip_backfill: Whether to skip the backfill step. Default: False.
605            forward_only: Whether the purpose of the plan is to make forward only changes.
606            no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that
607                if this flag is set to true and there are uncategorized changes the plan creation will
608                fail. Default: False.
609            auto_apply: Whether to automatically apply the new plan after creation. Default: False.
610            no_auto_categorization: Indicates whether to disable automatic categorization of model
611                changes (breaking / non-breaking). If not provided, then the corresponding configuration
612                option determines the behavior.
613
614        Returns:
615            The populated Plan object.
616        """
617        environment = environment or c.PROD
618        environment = Environment.normalize_name(environment)
619
620        if skip_backfill and not no_gaps and environment == c.PROD:
621            raise ConfigError(
622                "When targeting the production enviornment either the backfill should not be skipped or the lack of data gaps should be enforced (--no-gaps flag)."
623            )
624
625        self._run_plan_tests(skip_tests)
626
627        plan = Plan(
628            context_diff=self._context_diff(environment or c.PROD, create_from=create_from),
629            dag=self.dag,
630            state_reader=self.state_reader,
631            start=start,
632            end=end,
633            apply=self.apply,
634            restate_models=restate_models,
635            no_gaps=no_gaps,
636            skip_backfill=skip_backfill,
637            is_dev=environment != c.PROD,
638            forward_only=forward_only,
639            environment_ttl=self.config.environment_ttl,
640            categorizer_config=self.config.auto_categorize_changes,
641            auto_categorization_enabled=not no_auto_categorization,
642        )
643
644        if not no_prompts:
645            self.console.plan(plan, auto_apply)
646        elif auto_apply:
647            self.apply(plan)
648
649        return plan
650
651    def apply(self, plan: Plan) -> None:
652        """Applies a plan by pushing snapshots and backfilling data.
653
654        Given a plan, it pushes snapshots into the state sync and then uses the scheduler
655        to backfill all models.
656
657        Args:
658            plan: The plan to apply.
659        """
660        if not plan.context_diff.has_changes and not plan.requires_backfill:
661            return
662        if plan.uncategorized:
663            raise PlanError("Can't apply a plan with uncategorized changes.")
664        self.config.scheduler.create_plan_evaluator(self).evaluate(plan)
665
666    def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> None:
667        """Show a diff of the current context with a given environment.
668
669        Args:
670            environment: The environment to diff against.
671            detailed: Show the actual SQL differences if True.
672        """
673        environment = environment or c.PROD
674        environment = Environment.normalize_name(environment)
675        self.console.show_model_difference_summary(
676            self._context_diff(environment or c.PROD), detailed
677        )
678
679    def get_dag(self, format: str = "svg") -> graphviz.Digraph:
680        """Gets a graphviz dag.
681
682        This method requires installing the graphviz base library through your package manager
683        and the python graphviz library.
684
685        To display within Databricks:
686        displayHTML(context.get_dag().pipe(encoding='utf-8'))
687
688        Args:
689            format: The desired format to use for representing the graph
690        """
691        from sqlmesh import runtime_env
692
693        try:
694            import graphviz  # type: ignore
695        except ModuleNotFoundError as e:
696            if runtime_env.is_databricks:
697                raise MissingDependencyError(
698                    "Rendering a dag requires graphviz. Run `pip install graphviz` and then `sudo apt-get install -y python3-dev graphviz libgraphviz-dev pkg-config`"
699                )
700            raise MissingDependencyError(
701                "Rendering a dag requires a manual install of graphviz. Run `pip install graphviz` and then install graphviz library: https://graphviz.org/download/."
702            ) from e
703
704        graph = graphviz.Digraph(node_attr={"shape": "box"}, format=format)
705
706        for name, upstream in self.dag.graph.items():
707            graph.node(name)
708            for u in upstream:
709                graph.edge(u, name)
710        return graph
711
712    def render_dag(self, path: str, format: str = "jpeg") -> str:
713        """Render the dag using graphviz.
714
715        This method requires installing the graphviz base library through your package manager
716        and the python graphviz library.
717
718        Args:
719            path: filename to save the dag to
720            format: The desired format to use when rending the dag
721        """
722        graph = self.get_dag(format=format)
723
724        try:
725            return graph.render(path, format=format)
726        except graphviz.backend.execute.ExecutableNotFound as e:
727            raise MissingDependencyError(
728                "Graphviz is pip-installed but the system install is missing. Instructions: https://graphviz.org/download/"
729            ) from e
730
731    def test(
732        self,
733        match_patterns: t.Optional[t.List[str]] = None,
734        tests: t.Optional[t.List[str]] = None,
735        verbose: bool = False,
736    ) -> unittest.result.TestResult:
737        """Discover and run model tests"""
738        verbosity = 2 if verbose else 1
739        try:
740            if tests:
741                result = run_model_tests(
742                    tests=tests,
743                    snapshots=self.local_snapshots,
744                    engine_adapter=self._test_engine_adapter,
745                    verbosity=verbosity,
746                    patterns=match_patterns,
747                    ignore_patterns=self.ignore_patterns,
748                )
749            else:
750                result = run_all_model_tests(
751                    path=self.test_directory_path,
752                    snapshots=self.local_snapshots,
753                    engine_adapter=self._test_engine_adapter,
754                    verbosity=verbosity,
755                    patterns=match_patterns,
756                    ignore_patterns=self.ignore_patterns,
757                )
758        finally:
759            self._test_engine_adapter.close()
760        return result
761
762    def audit(
763        self,
764        start: TimeLike,
765        end: TimeLike,
766        *,
767        models: t.Optional[t.Iterator[str]] = None,
768        latest: t.Optional[TimeLike] = None,
769    ) -> None:
770        """Audit models.
771
772        Args:
773            start: The start of the interval to audit.
774            end: The end of the interval to audit.
775            models: The models to audit. All models will be audited if not specified.
776            latest: The latest time used for non incremental datasets.
777
778        """
779
780        snapshots = (
781            [self.snapshots[model] for model in models] if models else self.snapshots.values()
782        )
783
784        num_audits = sum(len(snapshot.model.audits) for snapshot in snapshots)
785        self.console.log_status_update(f"Found {num_audits} audit(s).")
786        errors = []
787        for snapshot in snapshots:
788            for audit_result in self.snapshot_evaluator.audit(
789                snapshot=snapshot,
790                start=start,
791                end=end,
792                snapshots=self.snapshots,
793                raise_exception=False,
794            ):
795                if audit_result.count:
796                    errors.append(audit_result)
797                    self.console.log_status_update(f"{audit_result.audit.name} FAIL.")
798                else:
799                    self.console.log_status_update(f"{audit_result.audit.name} PASS.")
800
801        self.console.log_status_update(f"\nFinished with {len(errors)} audit error(s).")
802        for error in errors:
803            self.console.log_status_update(
804                f"\nFailure in audit {error.audit.name} ({error.audit._path})."
805            )
806            self.console.log_status_update(f"Got {error.count} results, expected 0.")
807            self.console.show_sql(f"{error.query}")
808        self.console.log_status_update("Done.")
809
810    def close(self) -> None:
811        """Releases all resources allocated by this context."""
812        self.snapshot_evaluator.close()
813
814    def _run_plan_tests(
815        self, skip_tests: bool = False
816    ) -> t.Tuple[t.Optional[unittest.result.TestResult], t.Optional[str]]:
817        if self._test_engine_adapter and not skip_tests:
818            test_output_io = StringIO()
819            with contextlib.redirect_stderr(test_output_io):
820                result = self.test()
821            test_output = test_output_io.getvalue()
822            self.console.log_test_results(result, test_output, self._test_engine_adapter.dialect)
823            if not result.wasSuccessful():
824                raise PlanError(
825                    "Cannot generate plan due to failing test(s). Fix test(s) and run again"
826                )
827            return result, test_output
828        return None, None
829
830    @property
831    def _model_tables(self) -> t.Dict[str, str]:
832        """Mapping of model name to physical table name.
833
834        If a snapshot has not been versioned yet, its view name will be returned.
835        """
836        return {
837            name: snapshot.table_name()
838            if snapshot.version
839            else snapshot.qualified_view_name.for_environment(c.PROD)
840            for name, snapshot in self.snapshots.items()
841        }
842
843    def _context_diff(
844        self,
845        environment: str | Environment,
846        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
847        create_from: t.Optional[str] = None,
848    ) -> ContextDiff:
849        environment = Environment.normalize_name(environment)
850        return ContextDiff.create(
851            environment, snapshots or self.snapshots, create_from or c.PROD, self.state_reader
852        )
853
854    def _load_config(self, config: t.Union[str, Config]) -> Config:
855        if isinstance(config, Config):
856            return config
857
858        lookup_paths = [
859            self.sqlmesh_path / "config.yml",
860            self.sqlmesh_path / "config.yaml",
861            self.path / "config.py",
862            self.path / "config.yml",
863            self.path / "config.yaml",
864        ]
865        return load_config_from_paths(*lookup_paths, config_name=config)
866
867    def _add_model_to_dag(self, model: Model) -> None:
868        self.dag.graph[model.name] = set()
869
870        self.dag.add(model.name, model.depends_on)
871
872    def _run_janitor(self) -> None:
873        expired_environments = self.state_sync.delete_expired_environments()
874        for expired_environment in expired_environments:
875            self.snapshot_evaluator.demote(expired_environment.snapshots, expired_environment.name)
876
877        expired_snapshots = self.state_sync.delete_expired_snapshots()
878        self.snapshot_evaluator.cleanup(expired_snapshots)

Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks.

Arguments:
  • engine_adapter: The default engine adapter to use.
  • notification_targets: The notification target to use. Defaults to what is defined in config.
  • dialect: Default dialect of the sql in models.
  • physical_schema: The schema used to store physical materialized tables.
  • snapshot_ttl: Duration before unpromoted snapshots are removed.
  • path: The directory containing SQLMesh files.
  • config: A Config object or the name of a Config object in config.py.
  • connection: The name of the connection. If not specified the first connection as it appears in configuration will be used.
  • test_connection: The name of the connection to use for tests. If not specified the first connection as it appears in configuration will be used.
  • concurrent_tasks: The maximum number of tasks that can use the connection concurrently.
  • load: Whether or not to automatically load all models and macros (default True).
  • console: The rich instance used for printing out CLI command results.
  • users: A list of users to make known to SQLMesh.
Context( engine_adapter: Optional[sqlmesh.core.engine_adapter.base.EngineAdapter] = None, notification_targets: Optional[List[Annotated[Union[sqlmesh.core.notification_target.ConsoleNotificationTarget, sqlmesh.integrations.github.notification_target.GithubNotificationTarget], FieldInfo(default=PydanticUndefined, discriminator='type_', extra={})]]] = None, state_sync: Optional[sqlmesh.core.state_sync.base.StateSync] = None, dialect: str = '', physical_schema: str = '', snapshot_ttl: str = '', path: str = '', config: Union[sqlmesh.core.config.root.Config, str, NoneType] = None, connection: Optional[str] = None, test_connection: Optional[str] = None, concurrent_tasks: Optional[int] = None, loader: Optional[Type[sqlmesh.core.loader.Loader]] = None, load: bool = True, console: Optional[sqlmesh.core.console.Console] = None, users: Optional[List[sqlmesh.core.user.User]] = None)
190    def __init__(
191        self,
192        engine_adapter: t.Optional[EngineAdapter] = None,
193        notification_targets: t.Optional[t.List[NotificationTarget]] = None,
194        state_sync: t.Optional[StateSync] = None,
195        dialect: str = "",
196        physical_schema: str = "",
197        snapshot_ttl: str = "",
198        path: str = "",
199        config: t.Optional[t.Union[Config, str]] = None,
200        connection: t.Optional[str] = None,
201        test_connection: t.Optional[str] = None,
202        concurrent_tasks: t.Optional[int] = None,
203        loader: t.Optional[t.Type[Loader]] = None,
204        load: bool = True,
205        console: t.Optional[Console] = None,
206        users: t.Optional[t.List[User]] = None,
207    ):
208        self.console = console or get_console()
209        self.path = Path(path).absolute()
210        if not self.path.is_dir():
211            raise ConfigError(f"{path} is not a directory")
212
213        self.config = self._load_config(config or "config")
214
215        self.physical_schema = physical_schema or self.config.physical_schema or "sqlmesh"
216        self.snapshot_ttl = snapshot_ttl or self.config.snapshot_ttl or c.DEFAULT_SNAPSHOT_TTL
217        self.dag: DAG[str] = DAG()
218
219        self._models: UniqueKeyDict[str, Model] = UniqueKeyDict("models")
220        self._audits: UniqueKeyDict[str, Audit] = UniqueKeyDict("audits")
221        self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros")
222        self._hooks: UniqueKeyDict[str, hook] = UniqueKeyDict("hooks")
223
224        self.connection = connection
225        connection_config = self.config.get_connection(connection)
226        self.concurrent_tasks = concurrent_tasks or connection_config.concurrent_tasks
227        self._engine_adapter = engine_adapter or connection_config.create_engine_adapter()
228
229        test_connection_config = (
230            self.config.test_connection
231            if test_connection is None
232            else self.config.get_connection(test_connection)
233        )
234        self._test_engine_adapter = test_connection_config.create_engine_adapter()
235
236        self.dialect = dialect or self.config.model_defaults.dialect or self._engine_adapter.dialect
237
238        self.snapshot_evaluator = SnapshotEvaluator(
239            self.engine_adapter, ddl_concurrent_tasks=self.concurrent_tasks
240        )
241
242        self.notification_targets = self.config.notification_targets + (notification_targets or [])
243
244        self._provided_state_sync: t.Optional[StateSync] = state_sync
245        self._state_sync: t.Optional[StateSync] = None
246        self._state_reader: t.Optional[StateReader] = None
247
248        self.users = self.config.users + (users or [])
249
250        self._loader = (loader or self.config.loader or SqlMeshLoader)()
251
252        if load:
253            self.load()

Returns an engine adapter.

def upsert_model( self, model: Union[str, Annotated[Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})]], **kwargs: Any) -> Annotated[Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})]:
260    def upsert_model(self, model: t.Union[str, Model], **kwargs: t.Any) -> Model:
261        """Update or insert a model.
262
263        The context's models dictionary will be updated to include these changes.
264
265        Args:
266            model: Model name or instance to update.
267            kwargs: The kwargs to update the model with.
268
269        Returns:
270            A new instance of the updated or inserted model.
271        """
272        if isinstance(model, str):
273            model = self._models[model]
274
275        path = model._path  # type: ignore
276        # model.copy() can't be used here due to a cached state that can be a part of a model instance.
277        model = t.cast(Model, type(model)(**{**t.cast(Model, model).dict(), **kwargs}))
278        model._path = path
279        self._models.update({model.name: model})
280
281        self._add_model_to_dag(model)
282        update_model_schemas(self.dialect, self.dag, self._models)
283
284        return model

Update or insert a model.

The context's models dictionary will be updated to include these changes.

Arguments:
  • model: Model name or instance to update.
  • kwargs: The kwargs to update the model with.
Returns:

A new instance of the updated or inserted model.

def scheduler( self, environment: Optional[str] = None) -> sqlmesh.core.scheduler.Scheduler:
286    def scheduler(self, environment: t.Optional[str] = None) -> Scheduler:
287        """Returns the built-in scheduler.
288
289        Args:
290            environment: The target environment to source model snapshots from, or None
291                if snapshots should be sourced from the currently loaded local state.
292
293        Returns:
294            The built-in scheduler instance.
295        """
296        snapshots: t.Iterable[Snapshot]
297        if environment is not None:
298            stored_environment = self.state_sync.get_environment(environment)
299            if stored_environment is None:
300                raise ConfigError(f"Environment '{environment}' was not found.")
301            snapshots = self.state_sync.get_snapshots(stored_environment.snapshots).values()
302        else:
303            snapshots = self.snapshots.values()
304
305        if not snapshots:
306            raise ConfigError("No models were found")
307
308        return Scheduler(
309            snapshots,
310            self.snapshot_evaluator,
311            self.state_sync,
312            max_workers=self.concurrent_tasks,
313            console=self.console,
314        )

Returns the built-in scheduler.

Arguments:
  • environment: The target environment to source model snapshots from, or None if snapshots should be sourced from the currently loaded local state.
Returns:

The built-in scheduler instance.

sqlmesh_path: pathlib.Path

Path to the SQLMesh home directory.

models_directory_path: pathlib.Path

Path to the directory where the models are defined

macro_directory_path: pathlib.Path

Path to the directory where macros are defined

hook_directory_path: pathlib.Path

Path to the directory where hooks are defined

def refresh(self) -> None:
374    def refresh(self) -> None:
375        """Refresh all models that have been updated."""
376        if self._loader.reload_needed():
377            self.load()

Refresh all models that have been updated.

def load(self) -> sqlmesh.core.context.Context:
379    def load(self) -> Context:
380        """Load all files in the context's path."""
381        with sys_path(self.path):
382            project = self._loader.load(self)
383            self._hooks = project.hooks
384            self._macros = project.macros
385            self._models = project.models
386            self._audits = project.audits
387            self.dag = project.dag
388
389        return self

Load all files in the context's path.

def run( self, environment: Optional[str] = None, *, start: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, end: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, latest: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, skip_janitor: bool = False) -> None:
391    def run(
392        self,
393        environment: t.Optional[str] = None,
394        *,
395        start: t.Optional[TimeLike] = None,
396        end: t.Optional[TimeLike] = None,
397        latest: t.Optional[TimeLike] = None,
398        skip_janitor: bool = False,
399    ) -> None:
400        """Run the entire dag through the scheduler.
401
402        Args:
403            environment: The target environment to source model snapshots from. Default: prod.
404            start: The start of the interval to render.
405            end: The end of the interval to render.
406            latest: The latest time used for non incremental datasets.
407            skip_janitor: Whether to skip the jantitor task.
408        """
409        self.scheduler(environment=environment or c.PROD).run(start, end, latest)
410
411        if not skip_janitor:
412            self._run_janitor()

Run the entire dag through the scheduler.

Arguments:
  • environment: The target environment to source model snapshots from. Default: prod.
  • start: The start of the interval to render.
  • end: The end of the interval to render.
  • latest: The latest time used for non incremental datasets.
  • skip_janitor: Whether to skip the jantitor task.
def get_model( self, name: str) -> Optional[Annotated[Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})]]:
414    def get_model(self, name: str) -> t.Optional[Model]:
415        """Returns a model with the given name or None if a model with such name doesn't exist."""
416        return self._models.get(name)

Returns a model with the given name or None if a model with such name doesn't exist.

models: mappingproxy[str, typing.Annotated[typing.Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})]]

Returns all registered models in this context.

macros: mappingproxy[str, typing.Union[sqlmesh.utils.metaprogramming.Executable, sqlmesh.core.macros.macro]]

Returns all registered macros in this context.

hooks: mappingproxy[str, sqlmesh.core.hooks.hook]

Returns all registered hooks in this context.

Generates and returns snapshots based on models registered in this context.

If one of the snapshots has been previosly stored in the persisted state, the stored instance will be returned.

local_snapshots: Dict[str, sqlmesh.core.snapshot.definition.Snapshot]

Generates and returns snapshots based on models registered in this context without reconciling them with the persisted state.

def render( self, model_or_snapshot: <MagicMock id='5983644704'>, *, start: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, end: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, latest: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, expand: Union[bool, Iterable[str]] = False, **kwargs: Any) -> sqlglot.expressions.Expression:
467    def render(
468        self,
469        model_or_snapshot: ModelOrSnapshot,
470        *,
471        start: t.Optional[TimeLike] = None,
472        end: t.Optional[TimeLike] = None,
473        latest: t.Optional[TimeLike] = None,
474        expand: t.Union[bool, t.Iterable[str]] = False,
475        **kwargs: t.Any,
476    ) -> exp.Expression:
477        """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models.
478
479        Args:
480            model_or_snapshot: The model, model name, or snapshot to render.
481            start: The start of the interval to render.
482            end: The end of the interval to render.
483            latest: The latest time used for non incremental datasets.
484            expand: Whether or not to use expand materialized models, defaults to False.
485                If True, all referenced models are expanded as raw queries.
486                If a list, only referenced models are expanded as raw queries.
487
488        Returns:
489            The rendered expression.
490        """
491        latest = latest or yesterday_ds()
492
493        if isinstance(model_or_snapshot, str):
494            model = self._models[model_or_snapshot]
495        elif isinstance(model_or_snapshot, Snapshot):
496            model = model_or_snapshot.model
497        else:
498            model = model_or_snapshot
499
500        expand = self.dag.upstream(model.name) if expand is True else expand or []
501
502        if model.is_seed:
503            df = next(model.render(self, start=start, end=end, latest=latest, **kwargs))
504            return next(pandas_to_sql(df, model.columns_to_types))
505
506        return model.render_query(
507            start=start,
508            end=end,
509            latest=latest,
510            snapshots=self.snapshots,
511            expand=expand,
512            **kwargs,
513        )

Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models.

Arguments:
  • model_or_snapshot: The model, model name, or snapshot to render.
  • start: The start of the interval to render.
  • end: The end of the interval to render.
  • latest: The latest time used for non incremental datasets.
  • expand: Whether or not to use expand materialized models, defaults to False. If True, all referenced models are expanded as raw queries. If a list, only referenced models are expanded as raw queries.
Returns:

The rendered expression.

def evaluate( self, model_or_snapshot: <MagicMock id='5983717744'>, start: Union[datetime.date, datetime.datetime, str, int, float], end: Union[datetime.date, datetime.datetime, str, int, float], latest: Union[datetime.date, datetime.datetime, str, int, float], limit: Optional[int] = None, **kwargs: Any) -> <MagicMock id='5983709952'>:
515    def evaluate(
516        self,
517        model_or_snapshot: ModelOrSnapshot,
518        start: TimeLike,
519        end: TimeLike,
520        latest: TimeLike,
521        limit: t.Optional[int] = None,
522        **kwargs: t.Any,
523    ) -> DF:
524        """Evaluate a model or snapshot (running its query against a DB/Engine).
525
526        This method is used to test or iterate on models without side effects.
527
528        Args:
529            model_or_snapshot: The model, model name, or snapshot to render.
530            start: The start of the interval to evaluate.
531            end: The end of the interval to evaluate.
532            latest: The latest time used for non incremental datasets.
533            limit: A limit applied to the model.
534        """
535        if isinstance(model_or_snapshot, str):
536            snapshot = self.snapshots[model_or_snapshot]
537        elif isinstance(model_or_snapshot, Snapshot):
538            snapshot = model_or_snapshot
539        else:
540            snapshot = self.snapshots[model_or_snapshot.name]
541
542        df = self.snapshot_evaluator.evaluate(
543            snapshot,
544            start,
545            end,
546            latest,
547            snapshots=self.snapshots,
548            limit=limit or c.DEFAULT_MAX_LIMIT,
549        )
550
551        if df is None:
552            raise RuntimeError(f"Error evaluating {snapshot.model.name}")
553
554        return df

Evaluate a model or snapshot (running its query against a DB/Engine).

This method is used to test or iterate on models without side effects.

Arguments:
  • model_or_snapshot: The model, model name, or snapshot to render.
  • start: The start of the interval to evaluate.
  • end: The end of the interval to evaluate.
  • latest: The latest time used for non incremental datasets.
  • limit: A limit applied to the model.
def format(self) -> None:
556    def format(self) -> None:
557        """Format all models in a given directory."""
558        for model in self._models.values():
559            if not model.is_sql:
560                continue
561            with open(model._path, "r+", encoding="utf-8") as file:
562                expressions = parse(file.read(), default_dialect=self.dialect)
563                file.seek(0)
564                file.write(format_model_expressions(expressions, model.dialect))
565                file.truncate()

Format all models in a given directory.

def plan( self, environment: Optional[str] = None, *, start: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, end: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, create_from: Optional[str] = None, skip_tests: bool = False, restate_models: Optional[Iterable[str]] = None, no_gaps: bool = False, skip_backfill: bool = False, forward_only: bool = False, no_prompts: bool = False, auto_apply: bool = False, no_auto_categorization: Optional[bool] = None) -> sqlmesh.core.plan.definition.Plan:
567    def plan(
568        self,
569        environment: t.Optional[str] = None,
570        *,
571        start: t.Optional[TimeLike] = None,
572        end: t.Optional[TimeLike] = None,
573        create_from: t.Optional[str] = None,
574        skip_tests: bool = False,
575        restate_models: t.Optional[t.Iterable[str]] = None,
576        no_gaps: bool = False,
577        skip_backfill: bool = False,
578        forward_only: bool = False,
579        no_prompts: bool = False,
580        auto_apply: bool = False,
581        no_auto_categorization: t.Optional[bool] = None,
582    ) -> Plan:
583        """Interactively create a migration plan.
584
585        This method compares the current context with an environment. It then presents
586        the differences and asks whether to backfill each modified model.
587
588        Args:
589            environment: The environment to diff and plan against.
590            start: The start date of the backfill if there is one.
591            end: The end date of the backfill if there is one.
592            create_from: The environment to create the target environment from if it
593                doesn't exist. If not specified, the "prod" environment will be used.
594            skip_tests: Unit tests are run by default so this will skip them if enabled
595            restate_models: A list of of either internal or external models that need to be restated
596                for the given plan interval. If the target environment is a production environment,
597                ALL snapshots that depended on these upstream tables will have their intervals deleted
598                (even ones not in this current environment). Only the snapshots in this environment will
599                be backfilled whereas others need to be recovered on a future plan application. For development
600                environments only snapshots that are part of this plan will be affected.
601            no_gaps:  Whether to ensure that new snapshots for models that are already a
602                part of the target environment have no data gaps when compared against previous
603                snapshots for same models.
604            skip_backfill: Whether to skip the backfill step. Default: False.
605            forward_only: Whether the purpose of the plan is to make forward only changes.
606            no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that
607                if this flag is set to true and there are uncategorized changes the plan creation will
608                fail. Default: False.
609            auto_apply: Whether to automatically apply the new plan after creation. Default: False.
610            no_auto_categorization: Indicates whether to disable automatic categorization of model
611                changes (breaking / non-breaking). If not provided, then the corresponding configuration
612                option determines the behavior.
613
614        Returns:
615            The populated Plan object.
616        """
617        environment = environment or c.PROD
618        environment = Environment.normalize_name(environment)
619
620        if skip_backfill and not no_gaps and environment == c.PROD:
621            raise ConfigError(
622                "When targeting the production enviornment either the backfill should not be skipped or the lack of data gaps should be enforced (--no-gaps flag)."
623            )
624
625        self._run_plan_tests(skip_tests)
626
627        plan = Plan(
628            context_diff=self._context_diff(environment or c.PROD, create_from=create_from),
629            dag=self.dag,
630            state_reader=self.state_reader,
631            start=start,
632            end=end,
633            apply=self.apply,
634            restate_models=restate_models,
635            no_gaps=no_gaps,
636            skip_backfill=skip_backfill,
637            is_dev=environment != c.PROD,
638            forward_only=forward_only,
639            environment_ttl=self.config.environment_ttl,
640            categorizer_config=self.config.auto_categorize_changes,
641            auto_categorization_enabled=not no_auto_categorization,
642        )
643
644        if not no_prompts:
645            self.console.plan(plan, auto_apply)
646        elif auto_apply:
647            self.apply(plan)
648
649        return plan

Interactively create a migration plan.

This method compares the current context with an environment. It then presents the differences and asks whether to backfill each modified model.

Arguments:
  • environment: The environment to diff and plan against.
  • start: The start date of the backfill if there is one.
  • end: The end date of the backfill if there is one.
  • create_from: The environment to create the target environment from if it doesn't exist. If not specified, the "prod" environment will be used.
  • skip_tests: Unit tests are run by default so this will skip them if enabled
  • restate_models: A list of of either internal or external models that need to be restated for the given plan interval. If the target environment is a production environment, ALL snapshots that depended on these upstream tables will have their intervals deleted (even ones not in this current environment). Only the snapshots in this environment will be backfilled whereas others need to be recovered on a future plan application. For development environments only snapshots that are part of this plan will be affected.
  • no_gaps: Whether to ensure that new snapshots for models that are already a part of the target environment have no data gaps when compared against previous snapshots for same models.
  • skip_backfill: Whether to skip the backfill step. Default: False.
  • forward_only: Whether the purpose of the plan is to make forward only changes.
  • no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that if this flag is set to true and there are uncategorized changes the plan creation will fail. Default: False.
  • auto_apply: Whether to automatically apply the new plan after creation. Default: False.
  • no_auto_categorization: Indicates whether to disable automatic categorization of model changes (breaking / non-breaking). If not provided, then the corresponding configuration option determines the behavior.
Returns:

The populated Plan object.

def apply(self, plan: sqlmesh.core.plan.definition.Plan) -> None:
651    def apply(self, plan: Plan) -> None:
652        """Applies a plan by pushing snapshots and backfilling data.
653
654        Given a plan, it pushes snapshots into the state sync and then uses the scheduler
655        to backfill all models.
656
657        Args:
658            plan: The plan to apply.
659        """
660        if not plan.context_diff.has_changes and not plan.requires_backfill:
661            return
662        if plan.uncategorized:
663            raise PlanError("Can't apply a plan with uncategorized changes.")
664        self.config.scheduler.create_plan_evaluator(self).evaluate(plan)

Applies a plan by pushing snapshots and backfilling data.

Given a plan, it pushes snapshots into the state sync and then uses the scheduler to backfill all models.

Arguments:
  • plan: The plan to apply.
def diff(self, environment: Optional[str] = None, detailed: bool = False) -> None:
666    def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> None:
667        """Show a diff of the current context with a given environment.
668
669        Args:
670            environment: The environment to diff against.
671            detailed: Show the actual SQL differences if True.
672        """
673        environment = environment or c.PROD
674        environment = Environment.normalize_name(environment)
675        self.console.show_model_difference_summary(
676            self._context_diff(environment or c.PROD), detailed
677        )

Show a diff of the current context with a given environment.

Arguments:
  • environment: The environment to diff against.
  • detailed: Show the actual SQL differences if True.
def get_dag(self, format: str = 'svg') -> graphviz.graphs.Digraph:
679    def get_dag(self, format: str = "svg") -> graphviz.Digraph:
680        """Gets a graphviz dag.
681
682        This method requires installing the graphviz base library through your package manager
683        and the python graphviz library.
684
685        To display within Databricks:
686        displayHTML(context.get_dag().pipe(encoding='utf-8'))
687
688        Args:
689            format: The desired format to use for representing the graph
690        """
691        from sqlmesh import runtime_env
692
693        try:
694            import graphviz  # type: ignore
695        except ModuleNotFoundError as e:
696            if runtime_env.is_databricks:
697                raise MissingDependencyError(
698                    "Rendering a dag requires graphviz. Run `pip install graphviz` and then `sudo apt-get install -y python3-dev graphviz libgraphviz-dev pkg-config`"
699                )
700            raise MissingDependencyError(
701                "Rendering a dag requires a manual install of graphviz. Run `pip install graphviz` and then install graphviz library: https://graphviz.org/download/."
702            ) from e
703
704        graph = graphviz.Digraph(node_attr={"shape": "box"}, format=format)
705
706        for name, upstream in self.dag.graph.items():
707            graph.node(name)
708            for u in upstream:
709                graph.edge(u, name)
710        return graph

Gets a graphviz dag.

This method requires installing the graphviz base library through your package manager and the python graphviz library.

To display within Databricks: displayHTML(context.get_dag().pipe(encoding='utf-8'))

Arguments:
  • format: The desired format to use for representing the graph
def render_dag(self, path: str, format: str = 'jpeg') -> str:
712    def render_dag(self, path: str, format: str = "jpeg") -> str:
713        """Render the dag using graphviz.
714
715        This method requires installing the graphviz base library through your package manager
716        and the python graphviz library.
717
718        Args:
719            path: filename to save the dag to
720            format: The desired format to use when rending the dag
721        """
722        graph = self.get_dag(format=format)
723
724        try:
725            return graph.render(path, format=format)
726        except graphviz.backend.execute.ExecutableNotFound as e:
727            raise MissingDependencyError(
728                "Graphviz is pip-installed but the system install is missing. Instructions: https://graphviz.org/download/"
729            ) from e

Render the dag using graphviz.

This method requires installing the graphviz base library through your package manager and the python graphviz library.

Arguments:
  • path: filename to save the dag to
  • format: The desired format to use when rending the dag
def test( self, match_patterns: Optional[List[str]] = None, tests: Optional[List[str]] = None, verbose: bool = False) -> unittest.result.TestResult:
731    def test(
732        self,
733        match_patterns: t.Optional[t.List[str]] = None,
734        tests: t.Optional[t.List[str]] = None,
735        verbose: bool = False,
736    ) -> unittest.result.TestResult:
737        """Discover and run model tests"""
738        verbosity = 2 if verbose else 1
739        try:
740            if tests:
741                result = run_model_tests(
742                    tests=tests,
743                    snapshots=self.local_snapshots,
744                    engine_adapter=self._test_engine_adapter,
745                    verbosity=verbosity,
746                    patterns=match_patterns,
747                    ignore_patterns=self.ignore_patterns,
748                )
749            else:
750                result = run_all_model_tests(
751                    path=self.test_directory_path,
752                    snapshots=self.local_snapshots,
753                    engine_adapter=self._test_engine_adapter,
754                    verbosity=verbosity,
755                    patterns=match_patterns,
756                    ignore_patterns=self.ignore_patterns,
757                )
758        finally:
759            self._test_engine_adapter.close()
760        return result

Discover and run model tests

def audit( self, start: Union[datetime.date, datetime.datetime, str, int, float], end: Union[datetime.date, datetime.datetime, str, int, float], *, models: Optional[Iterator[str]] = None, latest: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None) -> None:
762    def audit(
763        self,
764        start: TimeLike,
765        end: TimeLike,
766        *,
767        models: t.Optional[t.Iterator[str]] = None,
768        latest: t.Optional[TimeLike] = None,
769    ) -> None:
770        """Audit models.
771
772        Args:
773            start: The start of the interval to audit.
774            end: The end of the interval to audit.
775            models: The models to audit. All models will be audited if not specified.
776            latest: The latest time used for non incremental datasets.
777
778        """
779
780        snapshots = (
781            [self.snapshots[model] for model in models] if models else self.snapshots.values()
782        )
783
784        num_audits = sum(len(snapshot.model.audits) for snapshot in snapshots)
785        self.console.log_status_update(f"Found {num_audits} audit(s).")
786        errors = []
787        for snapshot in snapshots:
788            for audit_result in self.snapshot_evaluator.audit(
789                snapshot=snapshot,
790                start=start,
791                end=end,
792                snapshots=self.snapshots,
793                raise_exception=False,
794            ):
795                if audit_result.count:
796                    errors.append(audit_result)
797                    self.console.log_status_update(f"{audit_result.audit.name} FAIL.")
798                else:
799                    self.console.log_status_update(f"{audit_result.audit.name} PASS.")
800
801        self.console.log_status_update(f"\nFinished with {len(errors)} audit error(s).")
802        for error in errors:
803            self.console.log_status_update(
804                f"\nFailure in audit {error.audit.name} ({error.audit._path})."
805            )
806            self.console.log_status_update(f"Got {error.count} results, expected 0.")
807            self.console.show_sql(f"{error.query}")
808        self.console.log_status_update("Done.")

Audit models.

Arguments:
  • start: The start of the interval to audit.
  • end: The end of the interval to audit.
  • models: The models to audit. All models will be audited if not specified.
  • latest: The latest time used for non incremental datasets.
def close(self) -> None:
810    def close(self) -> None:
811        """Releases all resources allocated by this context."""
812        self.snapshot_evaluator.close()

Releases all resources allocated by this context.