Context
A SQLMesh context encapsulates a SQLMesh environment. When you create a new context, it will discover and
load your project's models, macros, and audits. Afterwards, you can use the context to create and apply
plans, visualize your model's lineage, run your audits and model tests, and perform various other tasks.
For more information regarding what a context can do, see sqlmesh.core.context.Context
.
Examples:
Creating and applying a plan against the staging environment.
from sqlmesh.core.context import Context
context = Context(path="example", config="local_config")
plan = context.plan("staging")
context.apply(plan)
Running audits on your data.
from sqlmesh.core.context import Context
context = Context(path="example", config="local_config")
context.audit("yesterday", "now")
Running tests on your models.
from sqlmesh.core.context import Context
context = Context(path="example")
context.run_tests()
1""" 2# Context 3 4A SQLMesh context encapsulates a SQLMesh environment. When you create a new context, it will discover and 5load your project's models, macros, and audits. Afterwards, you can use the context to create and apply 6plans, visualize your model's lineage, run your audits and model tests, and perform various other tasks. 7For more information regarding what a context can do, see `sqlmesh.core.context.Context`. 8 9# Examples: 10 11Creating and applying a plan against the staging environment. 12```python 13from sqlmesh.core.context import Context 14context = Context(path="example", config="local_config") 15plan = context.plan("staging") 16context.apply(plan) 17``` 18 19Running audits on your data. 20```python 21from sqlmesh.core.context import Context 22context = Context(path="example", config="local_config") 23context.audit("yesterday", "now") 24``` 25 26Running tests on your models. 27```python 28from sqlmesh.core.context import Context 29context = Context(path="example") 30context.run_tests() 31``` 32""" 33from __future__ import annotations 34 35import abc 36import contextlib 37import typing as t 38import unittest.result 39from io import StringIO 40from pathlib import Path 41from types import MappingProxyType 42 43import pandas as pd 44from sqlglot import exp 45 46from sqlmesh.core import constants as c 47from sqlmesh.core._typing import NotificationTarget 48from sqlmesh.core.audit import Audit 49from sqlmesh.core.config import Config, load_config_from_paths 50from sqlmesh.core.console import Console, get_console 51from sqlmesh.core.context_diff import ContextDiff 52from sqlmesh.core.dialect import format_model_expressions, pandas_to_sql, parse 53from sqlmesh.core.engine_adapter import EngineAdapter 54from sqlmesh.core.environment import Environment 55from sqlmesh.core.hooks import hook 56from sqlmesh.core.loader import Loader, SqlMeshLoader, update_model_schemas 57from sqlmesh.core.macros import ExecutableOrMacro 58from sqlmesh.core.model import Model 59from sqlmesh.core.plan import Plan 60from sqlmesh.core.scheduler import Scheduler 61from sqlmesh.core.snapshot import ( 62 Snapshot, 63 SnapshotEvaluator, 64 SnapshotFingerprint, 65 to_table_mapping, 66) 67from sqlmesh.core.state_sync import StateReader, StateSync 68from sqlmesh.core.test import run_all_model_tests, run_model_tests 69from sqlmesh.core.user import User 70from sqlmesh.utils import UniqueKeyDict, sys_path 71from sqlmesh.utils.dag import DAG 72from sqlmesh.utils.date import TimeLike, yesterday_ds 73from sqlmesh.utils.errors import ConfigError, MissingDependencyError, PlanError 74 75if t.TYPE_CHECKING: 76 import graphviz 77 import pyspark 78 79 from sqlmesh.core.engine_adapter._typing import DF 80 81 ModelOrSnapshot = t.Union[str, Model, Snapshot] 82 83 84class BaseContext(abc.ABC): 85 """The base context which defines methods to execute a model.""" 86 87 @property 88 @abc.abstractmethod 89 def _model_tables(self) -> t.Dict[str, str]: 90 """Returns a mapping of model names to tables.""" 91 92 @property 93 @abc.abstractmethod 94 def engine_adapter(self) -> EngineAdapter: 95 """Returns an engine adapter.""" 96 97 @property 98 def spark(self) -> t.Optional[pyspark.sql.SparkSession]: 99 """Returns the spark session if it exists.""" 100 return self.engine_adapter.spark 101 102 def table(self, model_name: str) -> str: 103 """Gets the physical table name for a given model. 104 105 Args: 106 model_name: The model name. 107 108 Returns: 109 The physical table name. 110 """ 111 return self._model_tables[model_name] 112 113 def fetchdf(self, query: t.Union[exp.Expression, str]) -> pd.DataFrame: 114 """Fetches a dataframe given a sql string or sqlglot expression. 115 116 Args: 117 query: SQL string or sqlglot expression. 118 119 Returns: 120 The default dataframe is Pandas, but for Spark a PySpark dataframe is returned. 121 """ 122 return self.engine_adapter.fetchdf(query) 123 124 def fetch_pyspark_df(self, query: t.Union[exp.Expression, str]) -> pyspark.sql.DataFrame: 125 """Fetches a PySpark dataframe given a sql string or sqlglot expression. 126 127 Args: 128 query: SQL string or sqlglot expression. 129 130 Returns: 131 A PySpark dataframe. 132 """ 133 return self.engine_adapter.fetch_pyspark_df(query) 134 135 136class ExecutionContext(BaseContext): 137 """The minimal context needed to execute a model. 138 139 Args: 140 engine_adapter: The engine adapter to execute queries against. 141 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 142 is_dev: Indicates whether the evaluation happens in the development mode and temporary 143 tables / table clones should be used where applicable. 144 """ 145 146 def __init__( 147 self, 148 engine_adapter: EngineAdapter, 149 snapshots: t.Dict[str, Snapshot], 150 is_dev: bool, 151 ): 152 self.snapshots = snapshots 153 self.is_dev = is_dev 154 self._engine_adapter = engine_adapter 155 self.__model_tables = to_table_mapping(snapshots.values(), is_dev) 156 157 @property 158 def engine_adapter(self) -> EngineAdapter: 159 """Returns an engine adapter.""" 160 return self._engine_adapter 161 162 @property 163 def _model_tables(self) -> t.Dict[str, str]: 164 """Returns a mapping of model names to tables.""" 165 return self.__model_tables 166 167 168class Context(BaseContext): 169 """Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks. 170 171 Args: 172 engine_adapter: The default engine adapter to use. 173 notification_targets: The notification target to use. Defaults to what is defined in config. 174 dialect: Default dialect of the sql in models. 175 physical_schema: The schema used to store physical materialized tables. 176 snapshot_ttl: Duration before unpromoted snapshots are removed. 177 path: The directory containing SQLMesh files. 178 config: A Config object or the name of a Config object in config.py. 179 connection: The name of the connection. If not specified the first connection as it appears 180 in configuration will be used. 181 test_connection: The name of the connection to use for tests. If not specified the first 182 connection as it appears in configuration will be used. 183 concurrent_tasks: The maximum number of tasks that can use the connection concurrently. 184 load: Whether or not to automatically load all models and macros (default True). 185 console: The rich instance used for printing out CLI command results. 186 users: A list of users to make known to SQLMesh. 187 """ 188 189 def __init__( 190 self, 191 engine_adapter: t.Optional[EngineAdapter] = None, 192 notification_targets: t.Optional[t.List[NotificationTarget]] = None, 193 state_sync: t.Optional[StateSync] = None, 194 dialect: str = "", 195 physical_schema: str = "", 196 snapshot_ttl: str = "", 197 path: str = "", 198 config: t.Optional[t.Union[Config, str]] = None, 199 connection: t.Optional[str] = None, 200 test_connection: t.Optional[str] = None, 201 concurrent_tasks: t.Optional[int] = None, 202 loader: t.Optional[t.Type[Loader]] = None, 203 load: bool = True, 204 console: t.Optional[Console] = None, 205 users: t.Optional[t.List[User]] = None, 206 ): 207 self.console = console or get_console() 208 self.path = Path(path).absolute() 209 if not self.path.is_dir(): 210 raise ConfigError(f"{path} is not a directory") 211 212 self.config = self._load_config(config or "config") 213 214 self.physical_schema = physical_schema or self.config.physical_schema or "sqlmesh" 215 self.snapshot_ttl = snapshot_ttl or self.config.snapshot_ttl or c.DEFAULT_SNAPSHOT_TTL 216 self.dag: DAG[str] = DAG() 217 218 self._models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") 219 self._audits: UniqueKeyDict[str, Audit] = UniqueKeyDict("audits") 220 self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros") 221 self._hooks: UniqueKeyDict[str, hook] = UniqueKeyDict("hooks") 222 223 self.connection = connection 224 connection_config = self.config.get_connection(connection) 225 self.concurrent_tasks = concurrent_tasks or connection_config.concurrent_tasks 226 self._engine_adapter = engine_adapter or connection_config.create_engine_adapter() 227 228 test_connection_config = ( 229 self.config.test_connection 230 if test_connection is None 231 else self.config.get_connection(test_connection) 232 ) 233 self._test_engine_adapter = test_connection_config.create_engine_adapter() 234 235 self.dialect = dialect or self.config.model_defaults.dialect or self._engine_adapter.dialect 236 237 self.snapshot_evaluator = SnapshotEvaluator( 238 self.engine_adapter, ddl_concurrent_tasks=self.concurrent_tasks 239 ) 240 241 self.notification_targets = self.config.notification_targets + (notification_targets or []) 242 243 self._provided_state_sync: t.Optional[StateSync] = state_sync 244 self._state_sync: t.Optional[StateSync] = None 245 self._state_reader: t.Optional[StateReader] = None 246 247 self.users = self.config.users + (users or []) 248 249 self._loader = (loader or self.config.loader or SqlMeshLoader)() 250 251 if load: 252 self.load() 253 254 @property 255 def engine_adapter(self) -> EngineAdapter: 256 """Returns an engine adapter.""" 257 return self._engine_adapter 258 259 def upsert_model(self, model: t.Union[str, Model], **kwargs: t.Any) -> Model: 260 """Update or insert a model. 261 262 The context's models dictionary will be updated to include these changes. 263 264 Args: 265 model: Model name or instance to update. 266 kwargs: The kwargs to update the model with. 267 268 Returns: 269 A new instance of the updated or inserted model. 270 """ 271 if isinstance(model, str): 272 model = self._models[model] 273 274 path = model._path # type: ignore 275 # model.copy() can't be used here due to a cached state that can be a part of a model instance. 276 model = t.cast(Model, type(model)(**{**t.cast(Model, model).dict(), **kwargs})) 277 model._path = path 278 self._models.update({model.name: model}) 279 280 self._add_model_to_dag(model) 281 update_model_schemas(self.dialect, self.dag, self._models) 282 283 return model 284 285 def scheduler(self, environment: t.Optional[str] = None) -> Scheduler: 286 """Returns the built-in scheduler. 287 288 Args: 289 environment: The target environment to source model snapshots from, or None 290 if snapshots should be sourced from the currently loaded local state. 291 292 Returns: 293 The built-in scheduler instance. 294 """ 295 snapshots: t.Iterable[Snapshot] 296 if environment is not None: 297 stored_environment = self.state_sync.get_environment(environment) 298 if stored_environment is None: 299 raise ConfigError(f"Environment '{environment}' was not found.") 300 snapshots = self.state_sync.get_snapshots(stored_environment.snapshots).values() 301 else: 302 snapshots = self.snapshots.values() 303 304 if not snapshots: 305 raise ConfigError("No models were found") 306 307 return Scheduler( 308 snapshots, 309 self.snapshot_evaluator, 310 self.state_sync, 311 max_workers=self.concurrent_tasks, 312 console=self.console, 313 ) 314 315 @property 316 def state_sync(self) -> StateSync: 317 if not self._state_sync: 318 self._state_sync = self._provided_state_sync or self.config.scheduler.create_state_sync( 319 self 320 ) 321 if not self._state_sync: 322 raise ConfigError( 323 "The operation is not supported when using a read-only state sync" 324 ) 325 self._state_sync.init_schema() 326 return self._state_sync 327 328 @property 329 def state_reader(self) -> StateReader: 330 if not self._state_reader: 331 try: 332 self._state_reader = self.state_sync 333 except ConfigError: 334 self._state_reader = self.config.scheduler.create_state_reader(self) 335 if not self._state_reader: 336 raise ConfigError( 337 "Invalid configuration: neither State Sync nor Reader has been configured" 338 ) 339 return self._state_reader 340 341 @property 342 def sqlmesh_path(self) -> Path: 343 """Path to the SQLMesh home directory.""" 344 return Path.home() / ".sqlmesh" 345 346 @property 347 def models_directory_path(self) -> Path: 348 """Path to the directory where the models are defined""" 349 return self.path / "models" 350 351 @property 352 def macro_directory_path(self) -> Path: 353 """Path to the directory where macros are defined""" 354 return self.path / "macros" 355 356 @property 357 def hook_directory_path(self) -> Path: 358 """Path to the directory where hooks are defined""" 359 return self.path / "hooks" 360 361 @property 362 def test_directory_path(self) -> Path: 363 return self.path / "tests" 364 365 @property 366 def audits_directory_path(self) -> Path: 367 return self.path / "audits" 368 369 @property 370 def ignore_patterns(self) -> t.List[str]: 371 return c.IGNORE_PATTERNS + self.config.ignore_patterns 372 373 def refresh(self) -> None: 374 """Refresh all models that have been updated.""" 375 if self._loader.reload_needed(): 376 self.load() 377 378 def load(self) -> Context: 379 """Load all files in the context's path.""" 380 with sys_path(self.path): 381 project = self._loader.load(self) 382 self._hooks = project.hooks 383 self._macros = project.macros 384 self._models = project.models 385 self._audits = project.audits 386 self.dag = project.dag 387 388 return self 389 390 def run( 391 self, 392 environment: t.Optional[str] = None, 393 *, 394 start: t.Optional[TimeLike] = None, 395 end: t.Optional[TimeLike] = None, 396 latest: t.Optional[TimeLike] = None, 397 skip_janitor: bool = False, 398 ) -> None: 399 """Run the entire dag through the scheduler. 400 401 Args: 402 environment: The target environment to source model snapshots from. Default: prod. 403 start: The start of the interval to render. 404 end: The end of the interval to render. 405 latest: The latest time used for non incremental datasets. 406 skip_janitor: Whether to skip the jantitor task. 407 """ 408 self.scheduler(environment=environment or c.PROD).run(start, end, latest) 409 410 if not skip_janitor: 411 self._run_janitor() 412 413 def get_model(self, name: str) -> t.Optional[Model]: 414 """Returns a model with the given name or None if a model with such name doesn't exist.""" 415 return self._models.get(name) 416 417 @property 418 def models(self) -> MappingProxyType[str, Model]: 419 """Returns all registered models in this context.""" 420 return MappingProxyType(self._models) 421 422 @property 423 def macros(self) -> MappingProxyType[str, ExecutableOrMacro]: 424 """Returns all registered macros in this context.""" 425 return MappingProxyType(self._macros) 426 427 @property 428 def hooks(self) -> MappingProxyType[str, hook]: 429 """Returns all registered hooks in this context.""" 430 return MappingProxyType(self._hooks) 431 432 @property 433 def snapshots(self) -> t.Dict[str, Snapshot]: 434 """Generates and returns snapshots based on models registered in this context. 435 436 If one of the snapshots has been previosly stored in the persisted state, the stored 437 instance will be returned. 438 """ 439 local_snapshots = self.local_snapshots 440 441 stored_snapshots = self.state_reader.get_snapshots( 442 [s.snapshot_id for s in local_snapshots.values()] 443 ) 444 445 return {name: stored_snapshots.get(s.snapshot_id, s) for name, s in local_snapshots.items()} 446 447 @property 448 def local_snapshots(self) -> t.Dict[str, Snapshot]: 449 """Generates and returns snapshots based on models registered in this context without reconciling them 450 with the persisted state. 451 """ 452 local_snapshots = {} 453 fingerprint_cache: t.Dict[str, SnapshotFingerprint] = {} 454 for model in self._models.values(): 455 snapshot = Snapshot.from_model( 456 model, 457 physical_schema=self.physical_schema, 458 models=self._models, 459 ttl=self.snapshot_ttl, 460 audits=self._audits, 461 cache=fingerprint_cache, 462 ) 463 local_snapshots[model.name] = snapshot 464 return local_snapshots 465 466 def render( 467 self, 468 model_or_snapshot: ModelOrSnapshot, 469 *, 470 start: t.Optional[TimeLike] = None, 471 end: t.Optional[TimeLike] = None, 472 latest: t.Optional[TimeLike] = None, 473 expand: t.Union[bool, t.Iterable[str]] = False, 474 **kwargs: t.Any, 475 ) -> exp.Expression: 476 """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models. 477 478 Args: 479 model_or_snapshot: The model, model name, or snapshot to render. 480 start: The start of the interval to render. 481 end: The end of the interval to render. 482 latest: The latest time used for non incremental datasets. 483 expand: Whether or not to use expand materialized models, defaults to False. 484 If True, all referenced models are expanded as raw queries. 485 If a list, only referenced models are expanded as raw queries. 486 487 Returns: 488 The rendered expression. 489 """ 490 latest = latest or yesterday_ds() 491 492 if isinstance(model_or_snapshot, str): 493 model = self._models[model_or_snapshot] 494 elif isinstance(model_or_snapshot, Snapshot): 495 model = model_or_snapshot.model 496 else: 497 model = model_or_snapshot 498 499 expand = self.dag.upstream(model.name) if expand is True else expand or [] 500 501 if model.is_seed: 502 df = next(model.render(self, start=start, end=end, latest=latest, **kwargs)) 503 return next(pandas_to_sql(df, model.columns_to_types)) 504 505 return model.render_query( 506 start=start, 507 end=end, 508 latest=latest, 509 snapshots=self.snapshots, 510 expand=expand, 511 **kwargs, 512 ) 513 514 def evaluate( 515 self, 516 model_or_snapshot: ModelOrSnapshot, 517 start: TimeLike, 518 end: TimeLike, 519 latest: TimeLike, 520 limit: t.Optional[int] = None, 521 **kwargs: t.Any, 522 ) -> DF: 523 """Evaluate a model or snapshot (running its query against a DB/Engine). 524 525 This method is used to test or iterate on models without side effects. 526 527 Args: 528 model_or_snapshot: The model, model name, or snapshot to render. 529 start: The start of the interval to evaluate. 530 end: The end of the interval to evaluate. 531 latest: The latest time used for non incremental datasets. 532 limit: A limit applied to the model. 533 """ 534 if isinstance(model_or_snapshot, str): 535 snapshot = self.snapshots[model_or_snapshot] 536 elif isinstance(model_or_snapshot, Snapshot): 537 snapshot = model_or_snapshot 538 else: 539 snapshot = self.snapshots[model_or_snapshot.name] 540 541 df = self.snapshot_evaluator.evaluate( 542 snapshot, 543 start, 544 end, 545 latest, 546 snapshots=self.snapshots, 547 limit=limit or c.DEFAULT_MAX_LIMIT, 548 ) 549 550 if df is None: 551 raise RuntimeError(f"Error evaluating {snapshot.model.name}") 552 553 return df 554 555 def format(self) -> None: 556 """Format all models in a given directory.""" 557 for model in self._models.values(): 558 if not model.is_sql: 559 continue 560 with open(model._path, "r+", encoding="utf-8") as file: 561 expressions = parse(file.read(), default_dialect=self.dialect) 562 file.seek(0) 563 file.write(format_model_expressions(expressions, model.dialect)) 564 file.truncate() 565 566 def plan( 567 self, 568 environment: t.Optional[str] = None, 569 *, 570 start: t.Optional[TimeLike] = None, 571 end: t.Optional[TimeLike] = None, 572 create_from: t.Optional[str] = None, 573 skip_tests: bool = False, 574 restate_models: t.Optional[t.Iterable[str]] = None, 575 no_gaps: bool = False, 576 skip_backfill: bool = False, 577 forward_only: bool = False, 578 no_prompts: bool = False, 579 auto_apply: bool = False, 580 no_auto_categorization: t.Optional[bool] = None, 581 ) -> Plan: 582 """Interactively create a migration plan. 583 584 This method compares the current context with an environment. It then presents 585 the differences and asks whether to backfill each modified model. 586 587 Args: 588 environment: The environment to diff and plan against. 589 start: The start date of the backfill if there is one. 590 end: The end date of the backfill if there is one. 591 create_from: The environment to create the target environment from if it 592 doesn't exist. If not specified, the "prod" environment will be used. 593 skip_tests: Unit tests are run by default so this will skip them if enabled 594 restate_models: A list of of either internal or external models that need to be restated 595 for the given plan interval. If the target environment is a production environment, 596 ALL snapshots that depended on these upstream tables will have their intervals deleted 597 (even ones not in this current environment). Only the snapshots in this environment will 598 be backfilled whereas others need to be recovered on a future plan application. For development 599 environments only snapshots that are part of this plan will be affected. 600 no_gaps: Whether to ensure that new snapshots for models that are already a 601 part of the target environment have no data gaps when compared against previous 602 snapshots for same models. 603 skip_backfill: Whether to skip the backfill step. Default: False. 604 forward_only: Whether the purpose of the plan is to make forward only changes. 605 no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that 606 if this flag is set to true and there are uncategorized changes the plan creation will 607 fail. Default: False. 608 auto_apply: Whether to automatically apply the new plan after creation. Default: False. 609 no_auto_categorization: Indicates whether to disable automatic categorization of model 610 changes (breaking / non-breaking). If not provided, then the corresponding configuration 611 option determines the behavior. 612 613 Returns: 614 The populated Plan object. 615 """ 616 environment = environment or c.PROD 617 environment = Environment.normalize_name(environment) 618 619 if skip_backfill and not no_gaps and environment == c.PROD: 620 raise ConfigError( 621 "When targeting the production enviornment either the backfill should not be skipped or the lack of data gaps should be enforced (--no-gaps flag)." 622 ) 623 624 self._run_plan_tests(skip_tests) 625 626 plan = Plan( 627 context_diff=self._context_diff(environment or c.PROD, create_from=create_from), 628 dag=self.dag, 629 state_reader=self.state_reader, 630 start=start, 631 end=end, 632 apply=self.apply, 633 restate_models=restate_models, 634 no_gaps=no_gaps, 635 skip_backfill=skip_backfill, 636 is_dev=environment != c.PROD, 637 forward_only=forward_only, 638 environment_ttl=self.config.environment_ttl, 639 categorizer_config=self.config.auto_categorize_changes, 640 auto_categorization_enabled=not no_auto_categorization, 641 ) 642 643 if not no_prompts: 644 self.console.plan(plan, auto_apply) 645 elif auto_apply: 646 self.apply(plan) 647 648 return plan 649 650 def apply(self, plan: Plan) -> None: 651 """Applies a plan by pushing snapshots and backfilling data. 652 653 Given a plan, it pushes snapshots into the state sync and then uses the scheduler 654 to backfill all models. 655 656 Args: 657 plan: The plan to apply. 658 """ 659 if not plan.context_diff.has_changes and not plan.requires_backfill: 660 return 661 if plan.uncategorized: 662 raise PlanError("Can't apply a plan with uncategorized changes.") 663 self.config.scheduler.create_plan_evaluator(self).evaluate(plan) 664 665 def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> None: 666 """Show a diff of the current context with a given environment. 667 668 Args: 669 environment: The environment to diff against. 670 detailed: Show the actual SQL differences if True. 671 """ 672 environment = environment or c.PROD 673 environment = Environment.normalize_name(environment) 674 self.console.show_model_difference_summary( 675 self._context_diff(environment or c.PROD), detailed 676 ) 677 678 def get_dag(self, format: str = "svg") -> graphviz.Digraph: 679 """Gets a graphviz dag. 680 681 This method requires installing the graphviz base library through your package manager 682 and the python graphviz library. 683 684 To display within Databricks: 685 displayHTML(context.get_dag().pipe(encoding='utf-8')) 686 687 Args: 688 format: The desired format to use for representing the graph 689 """ 690 from sqlmesh import runtime_env 691 692 try: 693 import graphviz # type: ignore 694 except ModuleNotFoundError as e: 695 if runtime_env.is_databricks: 696 raise MissingDependencyError( 697 "Rendering a dag requires graphviz. Run `pip install graphviz` and then `sudo apt-get install -y python3-dev graphviz libgraphviz-dev pkg-config`" 698 ) 699 raise MissingDependencyError( 700 "Rendering a dag requires a manual install of graphviz. Run `pip install graphviz` and then install graphviz library: https://graphviz.org/download/." 701 ) from e 702 703 graph = graphviz.Digraph(node_attr={"shape": "box"}, format=format) 704 705 for name, upstream in self.dag.graph.items(): 706 graph.node(name) 707 for u in upstream: 708 graph.edge(u, name) 709 return graph 710 711 def render_dag(self, path: str, format: str = "jpeg") -> str: 712 """Render the dag using graphviz. 713 714 This method requires installing the graphviz base library through your package manager 715 and the python graphviz library. 716 717 Args: 718 path: filename to save the dag to 719 format: The desired format to use when rending the dag 720 """ 721 graph = self.get_dag(format=format) 722 723 try: 724 return graph.render(path, format=format) 725 except graphviz.backend.execute.ExecutableNotFound as e: 726 raise MissingDependencyError( 727 "Graphviz is pip-installed but the system install is missing. Instructions: https://graphviz.org/download/" 728 ) from e 729 730 def test( 731 self, 732 match_patterns: t.Optional[t.List[str]] = None, 733 tests: t.Optional[t.List[str]] = None, 734 verbose: bool = False, 735 ) -> unittest.result.TestResult: 736 """Discover and run model tests""" 737 verbosity = 2 if verbose else 1 738 try: 739 if tests: 740 result = run_model_tests( 741 tests=tests, 742 snapshots=self.local_snapshots, 743 engine_adapter=self._test_engine_adapter, 744 verbosity=verbosity, 745 patterns=match_patterns, 746 ignore_patterns=self.ignore_patterns, 747 ) 748 else: 749 result = run_all_model_tests( 750 path=self.test_directory_path, 751 snapshots=self.local_snapshots, 752 engine_adapter=self._test_engine_adapter, 753 verbosity=verbosity, 754 patterns=match_patterns, 755 ignore_patterns=self.ignore_patterns, 756 ) 757 finally: 758 self._test_engine_adapter.close() 759 return result 760 761 def audit( 762 self, 763 start: TimeLike, 764 end: TimeLike, 765 *, 766 models: t.Optional[t.Iterator[str]] = None, 767 latest: t.Optional[TimeLike] = None, 768 ) -> None: 769 """Audit models. 770 771 Args: 772 start: The start of the interval to audit. 773 end: The end of the interval to audit. 774 models: The models to audit. All models will be audited if not specified. 775 latest: The latest time used for non incremental datasets. 776 777 """ 778 779 snapshots = ( 780 [self.snapshots[model] for model in models] if models else self.snapshots.values() 781 ) 782 783 num_audits = sum(len(snapshot.model.audits) for snapshot in snapshots) 784 self.console.log_status_update(f"Found {num_audits} audit(s).") 785 errors = [] 786 for snapshot in snapshots: 787 for audit_result in self.snapshot_evaluator.audit( 788 snapshot=snapshot, 789 start=start, 790 end=end, 791 snapshots=self.snapshots, 792 raise_exception=False, 793 ): 794 if audit_result.count: 795 errors.append(audit_result) 796 self.console.log_status_update(f"{audit_result.audit.name} FAIL.") 797 else: 798 self.console.log_status_update(f"{audit_result.audit.name} PASS.") 799 800 self.console.log_status_update(f"\nFinished with {len(errors)} audit error(s).") 801 for error in errors: 802 self.console.log_status_update( 803 f"\nFailure in audit {error.audit.name} ({error.audit._path})." 804 ) 805 self.console.log_status_update(f"Got {error.count} results, expected 0.") 806 self.console.show_sql(f"{error.query}") 807 self.console.log_status_update("Done.") 808 809 def close(self) -> None: 810 """Releases all resources allocated by this context.""" 811 self.snapshot_evaluator.close() 812 813 def _run_plan_tests( 814 self, skip_tests: bool = False 815 ) -> t.Tuple[t.Optional[unittest.result.TestResult], t.Optional[str]]: 816 if self._test_engine_adapter and not skip_tests: 817 test_output_io = StringIO() 818 with contextlib.redirect_stderr(test_output_io): 819 result = self.test() 820 test_output = test_output_io.getvalue() 821 self.console.log_test_results(result, test_output, self._test_engine_adapter.dialect) 822 if not result.wasSuccessful(): 823 raise PlanError( 824 "Cannot generate plan due to failing test(s). Fix test(s) and run again" 825 ) 826 return result, test_output 827 return None, None 828 829 @property 830 def _model_tables(self) -> t.Dict[str, str]: 831 """Mapping of model name to physical table name. 832 833 If a snapshot has not been versioned yet, its view name will be returned. 834 """ 835 return { 836 name: snapshot.table_name() 837 if snapshot.version 838 else snapshot.qualified_view_name.for_environment(c.PROD) 839 for name, snapshot in self.snapshots.items() 840 } 841 842 def _context_diff( 843 self, 844 environment: str | Environment, 845 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 846 create_from: t.Optional[str] = None, 847 ) -> ContextDiff: 848 environment = Environment.normalize_name(environment) 849 return ContextDiff.create( 850 environment, snapshots or self.snapshots, create_from or c.PROD, self.state_reader 851 ) 852 853 def _load_config(self, config: t.Union[str, Config]) -> Config: 854 if isinstance(config, Config): 855 return config 856 857 lookup_paths = [ 858 self.sqlmesh_path / "config.yml", 859 self.sqlmesh_path / "config.yaml", 860 self.path / "config.py", 861 self.path / "config.yml", 862 self.path / "config.yaml", 863 ] 864 return load_config_from_paths(*lookup_paths, config_name=config) 865 866 def _add_model_to_dag(self, model: Model) -> None: 867 self.dag.graph[model.name] = set() 868 869 self.dag.add(model.name, model.depends_on) 870 871 def _run_janitor(self) -> None: 872 expired_environments = self.state_sync.delete_expired_environments() 873 for expired_environment in expired_environments: 874 self.snapshot_evaluator.demote(expired_environment.snapshots, expired_environment.name) 875 876 expired_snapshots = self.state_sync.delete_expired_snapshots() 877 self.snapshot_evaluator.cleanup(expired_snapshots)
85class BaseContext(abc.ABC): 86 """The base context which defines methods to execute a model.""" 87 88 @property 89 @abc.abstractmethod 90 def _model_tables(self) -> t.Dict[str, str]: 91 """Returns a mapping of model names to tables.""" 92 93 @property 94 @abc.abstractmethod 95 def engine_adapter(self) -> EngineAdapter: 96 """Returns an engine adapter.""" 97 98 @property 99 def spark(self) -> t.Optional[pyspark.sql.SparkSession]: 100 """Returns the spark session if it exists.""" 101 return self.engine_adapter.spark 102 103 def table(self, model_name: str) -> str: 104 """Gets the physical table name for a given model. 105 106 Args: 107 model_name: The model name. 108 109 Returns: 110 The physical table name. 111 """ 112 return self._model_tables[model_name] 113 114 def fetchdf(self, query: t.Union[exp.Expression, str]) -> pd.DataFrame: 115 """Fetches a dataframe given a sql string or sqlglot expression. 116 117 Args: 118 query: SQL string or sqlglot expression. 119 120 Returns: 121 The default dataframe is Pandas, but for Spark a PySpark dataframe is returned. 122 """ 123 return self.engine_adapter.fetchdf(query) 124 125 def fetch_pyspark_df(self, query: t.Union[exp.Expression, str]) -> pyspark.sql.DataFrame: 126 """Fetches a PySpark dataframe given a sql string or sqlglot expression. 127 128 Args: 129 query: SQL string or sqlglot expression. 130 131 Returns: 132 A PySpark dataframe. 133 """ 134 return self.engine_adapter.fetch_pyspark_df(query)
The base context which defines methods to execute a model.
103 def table(self, model_name: str) -> str: 104 """Gets the physical table name for a given model. 105 106 Args: 107 model_name: The model name. 108 109 Returns: 110 The physical table name. 111 """ 112 return self._model_tables[model_name]
Gets the physical table name for a given model.
Arguments:
- model_name: The model name.
Returns:
The physical table name.
114 def fetchdf(self, query: t.Union[exp.Expression, str]) -> pd.DataFrame: 115 """Fetches a dataframe given a sql string or sqlglot expression. 116 117 Args: 118 query: SQL string or sqlglot expression. 119 120 Returns: 121 The default dataframe is Pandas, but for Spark a PySpark dataframe is returned. 122 """ 123 return self.engine_adapter.fetchdf(query)
Fetches a dataframe given a sql string or sqlglot expression.
Arguments:
- query: SQL string or sqlglot expression.
Returns:
The default dataframe is Pandas, but for Spark a PySpark dataframe is returned.
125 def fetch_pyspark_df(self, query: t.Union[exp.Expression, str]) -> pyspark.sql.DataFrame: 126 """Fetches a PySpark dataframe given a sql string or sqlglot expression. 127 128 Args: 129 query: SQL string or sqlglot expression. 130 131 Returns: 132 A PySpark dataframe. 133 """ 134 return self.engine_adapter.fetch_pyspark_df(query)
Fetches a PySpark dataframe given a sql string or sqlglot expression.
Arguments:
- query: SQL string or sqlglot expression.
Returns:
A PySpark dataframe.
137class ExecutionContext(BaseContext): 138 """The minimal context needed to execute a model. 139 140 Args: 141 engine_adapter: The engine adapter to execute queries against. 142 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 143 is_dev: Indicates whether the evaluation happens in the development mode and temporary 144 tables / table clones should be used where applicable. 145 """ 146 147 def __init__( 148 self, 149 engine_adapter: EngineAdapter, 150 snapshots: t.Dict[str, Snapshot], 151 is_dev: bool, 152 ): 153 self.snapshots = snapshots 154 self.is_dev = is_dev 155 self._engine_adapter = engine_adapter 156 self.__model_tables = to_table_mapping(snapshots.values(), is_dev) 157 158 @property 159 def engine_adapter(self) -> EngineAdapter: 160 """Returns an engine adapter.""" 161 return self._engine_adapter 162 163 @property 164 def _model_tables(self) -> t.Dict[str, str]: 165 """Returns a mapping of model names to tables.""" 166 return self.__model_tables
The minimal context needed to execute a model.
Arguments:
- engine_adapter: The engine adapter to execute queries against.
- snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
- is_dev: Indicates whether the evaluation happens in the development mode and temporary tables / table clones should be used where applicable.
Inherited Members
169class Context(BaseContext): 170 """Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks. 171 172 Args: 173 engine_adapter: The default engine adapter to use. 174 notification_targets: The notification target to use. Defaults to what is defined in config. 175 dialect: Default dialect of the sql in models. 176 physical_schema: The schema used to store physical materialized tables. 177 snapshot_ttl: Duration before unpromoted snapshots are removed. 178 path: The directory containing SQLMesh files. 179 config: A Config object or the name of a Config object in config.py. 180 connection: The name of the connection. If not specified the first connection as it appears 181 in configuration will be used. 182 test_connection: The name of the connection to use for tests. If not specified the first 183 connection as it appears in configuration will be used. 184 concurrent_tasks: The maximum number of tasks that can use the connection concurrently. 185 load: Whether or not to automatically load all models and macros (default True). 186 console: The rich instance used for printing out CLI command results. 187 users: A list of users to make known to SQLMesh. 188 """ 189 190 def __init__( 191 self, 192 engine_adapter: t.Optional[EngineAdapter] = None, 193 notification_targets: t.Optional[t.List[NotificationTarget]] = None, 194 state_sync: t.Optional[StateSync] = None, 195 dialect: str = "", 196 physical_schema: str = "", 197 snapshot_ttl: str = "", 198 path: str = "", 199 config: t.Optional[t.Union[Config, str]] = None, 200 connection: t.Optional[str] = None, 201 test_connection: t.Optional[str] = None, 202 concurrent_tasks: t.Optional[int] = None, 203 loader: t.Optional[t.Type[Loader]] = None, 204 load: bool = True, 205 console: t.Optional[Console] = None, 206 users: t.Optional[t.List[User]] = None, 207 ): 208 self.console = console or get_console() 209 self.path = Path(path).absolute() 210 if not self.path.is_dir(): 211 raise ConfigError(f"{path} is not a directory") 212 213 self.config = self._load_config(config or "config") 214 215 self.physical_schema = physical_schema or self.config.physical_schema or "sqlmesh" 216 self.snapshot_ttl = snapshot_ttl or self.config.snapshot_ttl or c.DEFAULT_SNAPSHOT_TTL 217 self.dag: DAG[str] = DAG() 218 219 self._models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") 220 self._audits: UniqueKeyDict[str, Audit] = UniqueKeyDict("audits") 221 self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros") 222 self._hooks: UniqueKeyDict[str, hook] = UniqueKeyDict("hooks") 223 224 self.connection = connection 225 connection_config = self.config.get_connection(connection) 226 self.concurrent_tasks = concurrent_tasks or connection_config.concurrent_tasks 227 self._engine_adapter = engine_adapter or connection_config.create_engine_adapter() 228 229 test_connection_config = ( 230 self.config.test_connection 231 if test_connection is None 232 else self.config.get_connection(test_connection) 233 ) 234 self._test_engine_adapter = test_connection_config.create_engine_adapter() 235 236 self.dialect = dialect or self.config.model_defaults.dialect or self._engine_adapter.dialect 237 238 self.snapshot_evaluator = SnapshotEvaluator( 239 self.engine_adapter, ddl_concurrent_tasks=self.concurrent_tasks 240 ) 241 242 self.notification_targets = self.config.notification_targets + (notification_targets or []) 243 244 self._provided_state_sync: t.Optional[StateSync] = state_sync 245 self._state_sync: t.Optional[StateSync] = None 246 self._state_reader: t.Optional[StateReader] = None 247 248 self.users = self.config.users + (users or []) 249 250 self._loader = (loader or self.config.loader or SqlMeshLoader)() 251 252 if load: 253 self.load() 254 255 @property 256 def engine_adapter(self) -> EngineAdapter: 257 """Returns an engine adapter.""" 258 return self._engine_adapter 259 260 def upsert_model(self, model: t.Union[str, Model], **kwargs: t.Any) -> Model: 261 """Update or insert a model. 262 263 The context's models dictionary will be updated to include these changes. 264 265 Args: 266 model: Model name or instance to update. 267 kwargs: The kwargs to update the model with. 268 269 Returns: 270 A new instance of the updated or inserted model. 271 """ 272 if isinstance(model, str): 273 model = self._models[model] 274 275 path = model._path # type: ignore 276 # model.copy() can't be used here due to a cached state that can be a part of a model instance. 277 model = t.cast(Model, type(model)(**{**t.cast(Model, model).dict(), **kwargs})) 278 model._path = path 279 self._models.update({model.name: model}) 280 281 self._add_model_to_dag(model) 282 update_model_schemas(self.dialect, self.dag, self._models) 283 284 return model 285 286 def scheduler(self, environment: t.Optional[str] = None) -> Scheduler: 287 """Returns the built-in scheduler. 288 289 Args: 290 environment: The target environment to source model snapshots from, or None 291 if snapshots should be sourced from the currently loaded local state. 292 293 Returns: 294 The built-in scheduler instance. 295 """ 296 snapshots: t.Iterable[Snapshot] 297 if environment is not None: 298 stored_environment = self.state_sync.get_environment(environment) 299 if stored_environment is None: 300 raise ConfigError(f"Environment '{environment}' was not found.") 301 snapshots = self.state_sync.get_snapshots(stored_environment.snapshots).values() 302 else: 303 snapshots = self.snapshots.values() 304 305 if not snapshots: 306 raise ConfigError("No models were found") 307 308 return Scheduler( 309 snapshots, 310 self.snapshot_evaluator, 311 self.state_sync, 312 max_workers=self.concurrent_tasks, 313 console=self.console, 314 ) 315 316 @property 317 def state_sync(self) -> StateSync: 318 if not self._state_sync: 319 self._state_sync = self._provided_state_sync or self.config.scheduler.create_state_sync( 320 self 321 ) 322 if not self._state_sync: 323 raise ConfigError( 324 "The operation is not supported when using a read-only state sync" 325 ) 326 self._state_sync.init_schema() 327 return self._state_sync 328 329 @property 330 def state_reader(self) -> StateReader: 331 if not self._state_reader: 332 try: 333 self._state_reader = self.state_sync 334 except ConfigError: 335 self._state_reader = self.config.scheduler.create_state_reader(self) 336 if not self._state_reader: 337 raise ConfigError( 338 "Invalid configuration: neither State Sync nor Reader has been configured" 339 ) 340 return self._state_reader 341 342 @property 343 def sqlmesh_path(self) -> Path: 344 """Path to the SQLMesh home directory.""" 345 return Path.home() / ".sqlmesh" 346 347 @property 348 def models_directory_path(self) -> Path: 349 """Path to the directory where the models are defined""" 350 return self.path / "models" 351 352 @property 353 def macro_directory_path(self) -> Path: 354 """Path to the directory where macros are defined""" 355 return self.path / "macros" 356 357 @property 358 def hook_directory_path(self) -> Path: 359 """Path to the directory where hooks are defined""" 360 return self.path / "hooks" 361 362 @property 363 def test_directory_path(self) -> Path: 364 return self.path / "tests" 365 366 @property 367 def audits_directory_path(self) -> Path: 368 return self.path / "audits" 369 370 @property 371 def ignore_patterns(self) -> t.List[str]: 372 return c.IGNORE_PATTERNS + self.config.ignore_patterns 373 374 def refresh(self) -> None: 375 """Refresh all models that have been updated.""" 376 if self._loader.reload_needed(): 377 self.load() 378 379 def load(self) -> Context: 380 """Load all files in the context's path.""" 381 with sys_path(self.path): 382 project = self._loader.load(self) 383 self._hooks = project.hooks 384 self._macros = project.macros 385 self._models = project.models 386 self._audits = project.audits 387 self.dag = project.dag 388 389 return self 390 391 def run( 392 self, 393 environment: t.Optional[str] = None, 394 *, 395 start: t.Optional[TimeLike] = None, 396 end: t.Optional[TimeLike] = None, 397 latest: t.Optional[TimeLike] = None, 398 skip_janitor: bool = False, 399 ) -> None: 400 """Run the entire dag through the scheduler. 401 402 Args: 403 environment: The target environment to source model snapshots from. Default: prod. 404 start: The start of the interval to render. 405 end: The end of the interval to render. 406 latest: The latest time used for non incremental datasets. 407 skip_janitor: Whether to skip the jantitor task. 408 """ 409 self.scheduler(environment=environment or c.PROD).run(start, end, latest) 410 411 if not skip_janitor: 412 self._run_janitor() 413 414 def get_model(self, name: str) -> t.Optional[Model]: 415 """Returns a model with the given name or None if a model with such name doesn't exist.""" 416 return self._models.get(name) 417 418 @property 419 def models(self) -> MappingProxyType[str, Model]: 420 """Returns all registered models in this context.""" 421 return MappingProxyType(self._models) 422 423 @property 424 def macros(self) -> MappingProxyType[str, ExecutableOrMacro]: 425 """Returns all registered macros in this context.""" 426 return MappingProxyType(self._macros) 427 428 @property 429 def hooks(self) -> MappingProxyType[str, hook]: 430 """Returns all registered hooks in this context.""" 431 return MappingProxyType(self._hooks) 432 433 @property 434 def snapshots(self) -> t.Dict[str, Snapshot]: 435 """Generates and returns snapshots based on models registered in this context. 436 437 If one of the snapshots has been previosly stored in the persisted state, the stored 438 instance will be returned. 439 """ 440 local_snapshots = self.local_snapshots 441 442 stored_snapshots = self.state_reader.get_snapshots( 443 [s.snapshot_id for s in local_snapshots.values()] 444 ) 445 446 return {name: stored_snapshots.get(s.snapshot_id, s) for name, s in local_snapshots.items()} 447 448 @property 449 def local_snapshots(self) -> t.Dict[str, Snapshot]: 450 """Generates and returns snapshots based on models registered in this context without reconciling them 451 with the persisted state. 452 """ 453 local_snapshots = {} 454 fingerprint_cache: t.Dict[str, SnapshotFingerprint] = {} 455 for model in self._models.values(): 456 snapshot = Snapshot.from_model( 457 model, 458 physical_schema=self.physical_schema, 459 models=self._models, 460 ttl=self.snapshot_ttl, 461 audits=self._audits, 462 cache=fingerprint_cache, 463 ) 464 local_snapshots[model.name] = snapshot 465 return local_snapshots 466 467 def render( 468 self, 469 model_or_snapshot: ModelOrSnapshot, 470 *, 471 start: t.Optional[TimeLike] = None, 472 end: t.Optional[TimeLike] = None, 473 latest: t.Optional[TimeLike] = None, 474 expand: t.Union[bool, t.Iterable[str]] = False, 475 **kwargs: t.Any, 476 ) -> exp.Expression: 477 """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models. 478 479 Args: 480 model_or_snapshot: The model, model name, or snapshot to render. 481 start: The start of the interval to render. 482 end: The end of the interval to render. 483 latest: The latest time used for non incremental datasets. 484 expand: Whether or not to use expand materialized models, defaults to False. 485 If True, all referenced models are expanded as raw queries. 486 If a list, only referenced models are expanded as raw queries. 487 488 Returns: 489 The rendered expression. 490 """ 491 latest = latest or yesterday_ds() 492 493 if isinstance(model_or_snapshot, str): 494 model = self._models[model_or_snapshot] 495 elif isinstance(model_or_snapshot, Snapshot): 496 model = model_or_snapshot.model 497 else: 498 model = model_or_snapshot 499 500 expand = self.dag.upstream(model.name) if expand is True else expand or [] 501 502 if model.is_seed: 503 df = next(model.render(self, start=start, end=end, latest=latest, **kwargs)) 504 return next(pandas_to_sql(df, model.columns_to_types)) 505 506 return model.render_query( 507 start=start, 508 end=end, 509 latest=latest, 510 snapshots=self.snapshots, 511 expand=expand, 512 **kwargs, 513 ) 514 515 def evaluate( 516 self, 517 model_or_snapshot: ModelOrSnapshot, 518 start: TimeLike, 519 end: TimeLike, 520 latest: TimeLike, 521 limit: t.Optional[int] = None, 522 **kwargs: t.Any, 523 ) -> DF: 524 """Evaluate a model or snapshot (running its query against a DB/Engine). 525 526 This method is used to test or iterate on models without side effects. 527 528 Args: 529 model_or_snapshot: The model, model name, or snapshot to render. 530 start: The start of the interval to evaluate. 531 end: The end of the interval to evaluate. 532 latest: The latest time used for non incremental datasets. 533 limit: A limit applied to the model. 534 """ 535 if isinstance(model_or_snapshot, str): 536 snapshot = self.snapshots[model_or_snapshot] 537 elif isinstance(model_or_snapshot, Snapshot): 538 snapshot = model_or_snapshot 539 else: 540 snapshot = self.snapshots[model_or_snapshot.name] 541 542 df = self.snapshot_evaluator.evaluate( 543 snapshot, 544 start, 545 end, 546 latest, 547 snapshots=self.snapshots, 548 limit=limit or c.DEFAULT_MAX_LIMIT, 549 ) 550 551 if df is None: 552 raise RuntimeError(f"Error evaluating {snapshot.model.name}") 553 554 return df 555 556 def format(self) -> None: 557 """Format all models in a given directory.""" 558 for model in self._models.values(): 559 if not model.is_sql: 560 continue 561 with open(model._path, "r+", encoding="utf-8") as file: 562 expressions = parse(file.read(), default_dialect=self.dialect) 563 file.seek(0) 564 file.write(format_model_expressions(expressions, model.dialect)) 565 file.truncate() 566 567 def plan( 568 self, 569 environment: t.Optional[str] = None, 570 *, 571 start: t.Optional[TimeLike] = None, 572 end: t.Optional[TimeLike] = None, 573 create_from: t.Optional[str] = None, 574 skip_tests: bool = False, 575 restate_models: t.Optional[t.Iterable[str]] = None, 576 no_gaps: bool = False, 577 skip_backfill: bool = False, 578 forward_only: bool = False, 579 no_prompts: bool = False, 580 auto_apply: bool = False, 581 no_auto_categorization: t.Optional[bool] = None, 582 ) -> Plan: 583 """Interactively create a migration plan. 584 585 This method compares the current context with an environment. It then presents 586 the differences and asks whether to backfill each modified model. 587 588 Args: 589 environment: The environment to diff and plan against. 590 start: The start date of the backfill if there is one. 591 end: The end date of the backfill if there is one. 592 create_from: The environment to create the target environment from if it 593 doesn't exist. If not specified, the "prod" environment will be used. 594 skip_tests: Unit tests are run by default so this will skip them if enabled 595 restate_models: A list of of either internal or external models that need to be restated 596 for the given plan interval. If the target environment is a production environment, 597 ALL snapshots that depended on these upstream tables will have their intervals deleted 598 (even ones not in this current environment). Only the snapshots in this environment will 599 be backfilled whereas others need to be recovered on a future plan application. For development 600 environments only snapshots that are part of this plan will be affected. 601 no_gaps: Whether to ensure that new snapshots for models that are already a 602 part of the target environment have no data gaps when compared against previous 603 snapshots for same models. 604 skip_backfill: Whether to skip the backfill step. Default: False. 605 forward_only: Whether the purpose of the plan is to make forward only changes. 606 no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that 607 if this flag is set to true and there are uncategorized changes the plan creation will 608 fail. Default: False. 609 auto_apply: Whether to automatically apply the new plan after creation. Default: False. 610 no_auto_categorization: Indicates whether to disable automatic categorization of model 611 changes (breaking / non-breaking). If not provided, then the corresponding configuration 612 option determines the behavior. 613 614 Returns: 615 The populated Plan object. 616 """ 617 environment = environment or c.PROD 618 environment = Environment.normalize_name(environment) 619 620 if skip_backfill and not no_gaps and environment == c.PROD: 621 raise ConfigError( 622 "When targeting the production enviornment either the backfill should not be skipped or the lack of data gaps should be enforced (--no-gaps flag)." 623 ) 624 625 self._run_plan_tests(skip_tests) 626 627 plan = Plan( 628 context_diff=self._context_diff(environment or c.PROD, create_from=create_from), 629 dag=self.dag, 630 state_reader=self.state_reader, 631 start=start, 632 end=end, 633 apply=self.apply, 634 restate_models=restate_models, 635 no_gaps=no_gaps, 636 skip_backfill=skip_backfill, 637 is_dev=environment != c.PROD, 638 forward_only=forward_only, 639 environment_ttl=self.config.environment_ttl, 640 categorizer_config=self.config.auto_categorize_changes, 641 auto_categorization_enabled=not no_auto_categorization, 642 ) 643 644 if not no_prompts: 645 self.console.plan(plan, auto_apply) 646 elif auto_apply: 647 self.apply(plan) 648 649 return plan 650 651 def apply(self, plan: Plan) -> None: 652 """Applies a plan by pushing snapshots and backfilling data. 653 654 Given a plan, it pushes snapshots into the state sync and then uses the scheduler 655 to backfill all models. 656 657 Args: 658 plan: The plan to apply. 659 """ 660 if not plan.context_diff.has_changes and not plan.requires_backfill: 661 return 662 if plan.uncategorized: 663 raise PlanError("Can't apply a plan with uncategorized changes.") 664 self.config.scheduler.create_plan_evaluator(self).evaluate(plan) 665 666 def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> None: 667 """Show a diff of the current context with a given environment. 668 669 Args: 670 environment: The environment to diff against. 671 detailed: Show the actual SQL differences if True. 672 """ 673 environment = environment or c.PROD 674 environment = Environment.normalize_name(environment) 675 self.console.show_model_difference_summary( 676 self._context_diff(environment or c.PROD), detailed 677 ) 678 679 def get_dag(self, format: str = "svg") -> graphviz.Digraph: 680 """Gets a graphviz dag. 681 682 This method requires installing the graphviz base library through your package manager 683 and the python graphviz library. 684 685 To display within Databricks: 686 displayHTML(context.get_dag().pipe(encoding='utf-8')) 687 688 Args: 689 format: The desired format to use for representing the graph 690 """ 691 from sqlmesh import runtime_env 692 693 try: 694 import graphviz # type: ignore 695 except ModuleNotFoundError as e: 696 if runtime_env.is_databricks: 697 raise MissingDependencyError( 698 "Rendering a dag requires graphviz. Run `pip install graphviz` and then `sudo apt-get install -y python3-dev graphviz libgraphviz-dev pkg-config`" 699 ) 700 raise MissingDependencyError( 701 "Rendering a dag requires a manual install of graphviz. Run `pip install graphviz` and then install graphviz library: https://graphviz.org/download/." 702 ) from e 703 704 graph = graphviz.Digraph(node_attr={"shape": "box"}, format=format) 705 706 for name, upstream in self.dag.graph.items(): 707 graph.node(name) 708 for u in upstream: 709 graph.edge(u, name) 710 return graph 711 712 def render_dag(self, path: str, format: str = "jpeg") -> str: 713 """Render the dag using graphviz. 714 715 This method requires installing the graphviz base library through your package manager 716 and the python graphviz library. 717 718 Args: 719 path: filename to save the dag to 720 format: The desired format to use when rending the dag 721 """ 722 graph = self.get_dag(format=format) 723 724 try: 725 return graph.render(path, format=format) 726 except graphviz.backend.execute.ExecutableNotFound as e: 727 raise MissingDependencyError( 728 "Graphviz is pip-installed but the system install is missing. Instructions: https://graphviz.org/download/" 729 ) from e 730 731 def test( 732 self, 733 match_patterns: t.Optional[t.List[str]] = None, 734 tests: t.Optional[t.List[str]] = None, 735 verbose: bool = False, 736 ) -> unittest.result.TestResult: 737 """Discover and run model tests""" 738 verbosity = 2 if verbose else 1 739 try: 740 if tests: 741 result = run_model_tests( 742 tests=tests, 743 snapshots=self.local_snapshots, 744 engine_adapter=self._test_engine_adapter, 745 verbosity=verbosity, 746 patterns=match_patterns, 747 ignore_patterns=self.ignore_patterns, 748 ) 749 else: 750 result = run_all_model_tests( 751 path=self.test_directory_path, 752 snapshots=self.local_snapshots, 753 engine_adapter=self._test_engine_adapter, 754 verbosity=verbosity, 755 patterns=match_patterns, 756 ignore_patterns=self.ignore_patterns, 757 ) 758 finally: 759 self._test_engine_adapter.close() 760 return result 761 762 def audit( 763 self, 764 start: TimeLike, 765 end: TimeLike, 766 *, 767 models: t.Optional[t.Iterator[str]] = None, 768 latest: t.Optional[TimeLike] = None, 769 ) -> None: 770 """Audit models. 771 772 Args: 773 start: The start of the interval to audit. 774 end: The end of the interval to audit. 775 models: The models to audit. All models will be audited if not specified. 776 latest: The latest time used for non incremental datasets. 777 778 """ 779 780 snapshots = ( 781 [self.snapshots[model] for model in models] if models else self.snapshots.values() 782 ) 783 784 num_audits = sum(len(snapshot.model.audits) for snapshot in snapshots) 785 self.console.log_status_update(f"Found {num_audits} audit(s).") 786 errors = [] 787 for snapshot in snapshots: 788 for audit_result in self.snapshot_evaluator.audit( 789 snapshot=snapshot, 790 start=start, 791 end=end, 792 snapshots=self.snapshots, 793 raise_exception=False, 794 ): 795 if audit_result.count: 796 errors.append(audit_result) 797 self.console.log_status_update(f"{audit_result.audit.name} FAIL.") 798 else: 799 self.console.log_status_update(f"{audit_result.audit.name} PASS.") 800 801 self.console.log_status_update(f"\nFinished with {len(errors)} audit error(s).") 802 for error in errors: 803 self.console.log_status_update( 804 f"\nFailure in audit {error.audit.name} ({error.audit._path})." 805 ) 806 self.console.log_status_update(f"Got {error.count} results, expected 0.") 807 self.console.show_sql(f"{error.query}") 808 self.console.log_status_update("Done.") 809 810 def close(self) -> None: 811 """Releases all resources allocated by this context.""" 812 self.snapshot_evaluator.close() 813 814 def _run_plan_tests( 815 self, skip_tests: bool = False 816 ) -> t.Tuple[t.Optional[unittest.result.TestResult], t.Optional[str]]: 817 if self._test_engine_adapter and not skip_tests: 818 test_output_io = StringIO() 819 with contextlib.redirect_stderr(test_output_io): 820 result = self.test() 821 test_output = test_output_io.getvalue() 822 self.console.log_test_results(result, test_output, self._test_engine_adapter.dialect) 823 if not result.wasSuccessful(): 824 raise PlanError( 825 "Cannot generate plan due to failing test(s). Fix test(s) and run again" 826 ) 827 return result, test_output 828 return None, None 829 830 @property 831 def _model_tables(self) -> t.Dict[str, str]: 832 """Mapping of model name to physical table name. 833 834 If a snapshot has not been versioned yet, its view name will be returned. 835 """ 836 return { 837 name: snapshot.table_name() 838 if snapshot.version 839 else snapshot.qualified_view_name.for_environment(c.PROD) 840 for name, snapshot in self.snapshots.items() 841 } 842 843 def _context_diff( 844 self, 845 environment: str | Environment, 846 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 847 create_from: t.Optional[str] = None, 848 ) -> ContextDiff: 849 environment = Environment.normalize_name(environment) 850 return ContextDiff.create( 851 environment, snapshots or self.snapshots, create_from or c.PROD, self.state_reader 852 ) 853 854 def _load_config(self, config: t.Union[str, Config]) -> Config: 855 if isinstance(config, Config): 856 return config 857 858 lookup_paths = [ 859 self.sqlmesh_path / "config.yml", 860 self.sqlmesh_path / "config.yaml", 861 self.path / "config.py", 862 self.path / "config.yml", 863 self.path / "config.yaml", 864 ] 865 return load_config_from_paths(*lookup_paths, config_name=config) 866 867 def _add_model_to_dag(self, model: Model) -> None: 868 self.dag.graph[model.name] = set() 869 870 self.dag.add(model.name, model.depends_on) 871 872 def _run_janitor(self) -> None: 873 expired_environments = self.state_sync.delete_expired_environments() 874 for expired_environment in expired_environments: 875 self.snapshot_evaluator.demote(expired_environment.snapshots, expired_environment.name) 876 877 expired_snapshots = self.state_sync.delete_expired_snapshots() 878 self.snapshot_evaluator.cleanup(expired_snapshots)
Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks.
Arguments:
- engine_adapter: The default engine adapter to use.
- notification_targets: The notification target to use. Defaults to what is defined in config.
- dialect: Default dialect of the sql in models.
- physical_schema: The schema used to store physical materialized tables.
- snapshot_ttl: Duration before unpromoted snapshots are removed.
- path: The directory containing SQLMesh files.
- config: A Config object or the name of a Config object in config.py.
- connection: The name of the connection. If not specified the first connection as it appears in configuration will be used.
- test_connection: The name of the connection to use for tests. If not specified the first connection as it appears in configuration will be used.
- concurrent_tasks: The maximum number of tasks that can use the connection concurrently.
- load: Whether or not to automatically load all models and macros (default True).
- console: The rich instance used for printing out CLI command results.
- users: A list of users to make known to SQLMesh.
190 def __init__( 191 self, 192 engine_adapter: t.Optional[EngineAdapter] = None, 193 notification_targets: t.Optional[t.List[NotificationTarget]] = None, 194 state_sync: t.Optional[StateSync] = None, 195 dialect: str = "", 196 physical_schema: str = "", 197 snapshot_ttl: str = "", 198 path: str = "", 199 config: t.Optional[t.Union[Config, str]] = None, 200 connection: t.Optional[str] = None, 201 test_connection: t.Optional[str] = None, 202 concurrent_tasks: t.Optional[int] = None, 203 loader: t.Optional[t.Type[Loader]] = None, 204 load: bool = True, 205 console: t.Optional[Console] = None, 206 users: t.Optional[t.List[User]] = None, 207 ): 208 self.console = console or get_console() 209 self.path = Path(path).absolute() 210 if not self.path.is_dir(): 211 raise ConfigError(f"{path} is not a directory") 212 213 self.config = self._load_config(config or "config") 214 215 self.physical_schema = physical_schema or self.config.physical_schema or "sqlmesh" 216 self.snapshot_ttl = snapshot_ttl or self.config.snapshot_ttl or c.DEFAULT_SNAPSHOT_TTL 217 self.dag: DAG[str] = DAG() 218 219 self._models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") 220 self._audits: UniqueKeyDict[str, Audit] = UniqueKeyDict("audits") 221 self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros") 222 self._hooks: UniqueKeyDict[str, hook] = UniqueKeyDict("hooks") 223 224 self.connection = connection 225 connection_config = self.config.get_connection(connection) 226 self.concurrent_tasks = concurrent_tasks or connection_config.concurrent_tasks 227 self._engine_adapter = engine_adapter or connection_config.create_engine_adapter() 228 229 test_connection_config = ( 230 self.config.test_connection 231 if test_connection is None 232 else self.config.get_connection(test_connection) 233 ) 234 self._test_engine_adapter = test_connection_config.create_engine_adapter() 235 236 self.dialect = dialect or self.config.model_defaults.dialect or self._engine_adapter.dialect 237 238 self.snapshot_evaluator = SnapshotEvaluator( 239 self.engine_adapter, ddl_concurrent_tasks=self.concurrent_tasks 240 ) 241 242 self.notification_targets = self.config.notification_targets + (notification_targets or []) 243 244 self._provided_state_sync: t.Optional[StateSync] = state_sync 245 self._state_sync: t.Optional[StateSync] = None 246 self._state_reader: t.Optional[StateReader] = None 247 248 self.users = self.config.users + (users or []) 249 250 self._loader = (loader or self.config.loader or SqlMeshLoader)() 251 252 if load: 253 self.load()
260 def upsert_model(self, model: t.Union[str, Model], **kwargs: t.Any) -> Model: 261 """Update or insert a model. 262 263 The context's models dictionary will be updated to include these changes. 264 265 Args: 266 model: Model name or instance to update. 267 kwargs: The kwargs to update the model with. 268 269 Returns: 270 A new instance of the updated or inserted model. 271 """ 272 if isinstance(model, str): 273 model = self._models[model] 274 275 path = model._path # type: ignore 276 # model.copy() can't be used here due to a cached state that can be a part of a model instance. 277 model = t.cast(Model, type(model)(**{**t.cast(Model, model).dict(), **kwargs})) 278 model._path = path 279 self._models.update({model.name: model}) 280 281 self._add_model_to_dag(model) 282 update_model_schemas(self.dialect, self.dag, self._models) 283 284 return model
Update or insert a model.
The context's models dictionary will be updated to include these changes.
Arguments:
- model: Model name or instance to update.
- kwargs: The kwargs to update the model with.
Returns:
A new instance of the updated or inserted model.
286 def scheduler(self, environment: t.Optional[str] = None) -> Scheduler: 287 """Returns the built-in scheduler. 288 289 Args: 290 environment: The target environment to source model snapshots from, or None 291 if snapshots should be sourced from the currently loaded local state. 292 293 Returns: 294 The built-in scheduler instance. 295 """ 296 snapshots: t.Iterable[Snapshot] 297 if environment is not None: 298 stored_environment = self.state_sync.get_environment(environment) 299 if stored_environment is None: 300 raise ConfigError(f"Environment '{environment}' was not found.") 301 snapshots = self.state_sync.get_snapshots(stored_environment.snapshots).values() 302 else: 303 snapshots = self.snapshots.values() 304 305 if not snapshots: 306 raise ConfigError("No models were found") 307 308 return Scheduler( 309 snapshots, 310 self.snapshot_evaluator, 311 self.state_sync, 312 max_workers=self.concurrent_tasks, 313 console=self.console, 314 )
Returns the built-in scheduler.
Arguments:
- environment: The target environment to source model snapshots from, or None if snapshots should be sourced from the currently loaded local state.
Returns:
The built-in scheduler instance.
374 def refresh(self) -> None: 375 """Refresh all models that have been updated.""" 376 if self._loader.reload_needed(): 377 self.load()
Refresh all models that have been updated.
379 def load(self) -> Context: 380 """Load all files in the context's path.""" 381 with sys_path(self.path): 382 project = self._loader.load(self) 383 self._hooks = project.hooks 384 self._macros = project.macros 385 self._models = project.models 386 self._audits = project.audits 387 self.dag = project.dag 388 389 return self
Load all files in the context's path.
391 def run( 392 self, 393 environment: t.Optional[str] = None, 394 *, 395 start: t.Optional[TimeLike] = None, 396 end: t.Optional[TimeLike] = None, 397 latest: t.Optional[TimeLike] = None, 398 skip_janitor: bool = False, 399 ) -> None: 400 """Run the entire dag through the scheduler. 401 402 Args: 403 environment: The target environment to source model snapshots from. Default: prod. 404 start: The start of the interval to render. 405 end: The end of the interval to render. 406 latest: The latest time used for non incremental datasets. 407 skip_janitor: Whether to skip the jantitor task. 408 """ 409 self.scheduler(environment=environment or c.PROD).run(start, end, latest) 410 411 if not skip_janitor: 412 self._run_janitor()
Run the entire dag through the scheduler.
Arguments:
- environment: The target environment to source model snapshots from. Default: prod.
- start: The start of the interval to render.
- end: The end of the interval to render.
- latest: The latest time used for non incremental datasets.
- skip_janitor: Whether to skip the jantitor task.
414 def get_model(self, name: str) -> t.Optional[Model]: 415 """Returns a model with the given name or None if a model with such name doesn't exist.""" 416 return self._models.get(name)
Returns a model with the given name or None if a model with such name doesn't exist.
Returns all registered models in this context.
Returns all registered macros in this context.
Generates and returns snapshots based on models registered in this context.
If one of the snapshots has been previosly stored in the persisted state, the stored instance will be returned.
Generates and returns snapshots based on models registered in this context without reconciling them with the persisted state.
467 def render( 468 self, 469 model_or_snapshot: ModelOrSnapshot, 470 *, 471 start: t.Optional[TimeLike] = None, 472 end: t.Optional[TimeLike] = None, 473 latest: t.Optional[TimeLike] = None, 474 expand: t.Union[bool, t.Iterable[str]] = False, 475 **kwargs: t.Any, 476 ) -> exp.Expression: 477 """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models. 478 479 Args: 480 model_or_snapshot: The model, model name, or snapshot to render. 481 start: The start of the interval to render. 482 end: The end of the interval to render. 483 latest: The latest time used for non incremental datasets. 484 expand: Whether or not to use expand materialized models, defaults to False. 485 If True, all referenced models are expanded as raw queries. 486 If a list, only referenced models are expanded as raw queries. 487 488 Returns: 489 The rendered expression. 490 """ 491 latest = latest or yesterday_ds() 492 493 if isinstance(model_or_snapshot, str): 494 model = self._models[model_or_snapshot] 495 elif isinstance(model_or_snapshot, Snapshot): 496 model = model_or_snapshot.model 497 else: 498 model = model_or_snapshot 499 500 expand = self.dag.upstream(model.name) if expand is True else expand or [] 501 502 if model.is_seed: 503 df = next(model.render(self, start=start, end=end, latest=latest, **kwargs)) 504 return next(pandas_to_sql(df, model.columns_to_types)) 505 506 return model.render_query( 507 start=start, 508 end=end, 509 latest=latest, 510 snapshots=self.snapshots, 511 expand=expand, 512 **kwargs, 513 )
Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models.
Arguments:
- model_or_snapshot: The model, model name, or snapshot to render.
- start: The start of the interval to render.
- end: The end of the interval to render.
- latest: The latest time used for non incremental datasets.
- expand: Whether or not to use expand materialized models, defaults to False. If True, all referenced models are expanded as raw queries. If a list, only referenced models are expanded as raw queries.
Returns:
The rendered expression.
515 def evaluate( 516 self, 517 model_or_snapshot: ModelOrSnapshot, 518 start: TimeLike, 519 end: TimeLike, 520 latest: TimeLike, 521 limit: t.Optional[int] = None, 522 **kwargs: t.Any, 523 ) -> DF: 524 """Evaluate a model or snapshot (running its query against a DB/Engine). 525 526 This method is used to test or iterate on models without side effects. 527 528 Args: 529 model_or_snapshot: The model, model name, or snapshot to render. 530 start: The start of the interval to evaluate. 531 end: The end of the interval to evaluate. 532 latest: The latest time used for non incremental datasets. 533 limit: A limit applied to the model. 534 """ 535 if isinstance(model_or_snapshot, str): 536 snapshot = self.snapshots[model_or_snapshot] 537 elif isinstance(model_or_snapshot, Snapshot): 538 snapshot = model_or_snapshot 539 else: 540 snapshot = self.snapshots[model_or_snapshot.name] 541 542 df = self.snapshot_evaluator.evaluate( 543 snapshot, 544 start, 545 end, 546 latest, 547 snapshots=self.snapshots, 548 limit=limit or c.DEFAULT_MAX_LIMIT, 549 ) 550 551 if df is None: 552 raise RuntimeError(f"Error evaluating {snapshot.model.name}") 553 554 return df
Evaluate a model or snapshot (running its query against a DB/Engine).
This method is used to test or iterate on models without side effects.
Arguments:
- model_or_snapshot: The model, model name, or snapshot to render.
- start: The start of the interval to evaluate.
- end: The end of the interval to evaluate.
- latest: The latest time used for non incremental datasets.
- limit: A limit applied to the model.
556 def format(self) -> None: 557 """Format all models in a given directory.""" 558 for model in self._models.values(): 559 if not model.is_sql: 560 continue 561 with open(model._path, "r+", encoding="utf-8") as file: 562 expressions = parse(file.read(), default_dialect=self.dialect) 563 file.seek(0) 564 file.write(format_model_expressions(expressions, model.dialect)) 565 file.truncate()
Format all models in a given directory.
567 def plan( 568 self, 569 environment: t.Optional[str] = None, 570 *, 571 start: t.Optional[TimeLike] = None, 572 end: t.Optional[TimeLike] = None, 573 create_from: t.Optional[str] = None, 574 skip_tests: bool = False, 575 restate_models: t.Optional[t.Iterable[str]] = None, 576 no_gaps: bool = False, 577 skip_backfill: bool = False, 578 forward_only: bool = False, 579 no_prompts: bool = False, 580 auto_apply: bool = False, 581 no_auto_categorization: t.Optional[bool] = None, 582 ) -> Plan: 583 """Interactively create a migration plan. 584 585 This method compares the current context with an environment. It then presents 586 the differences and asks whether to backfill each modified model. 587 588 Args: 589 environment: The environment to diff and plan against. 590 start: The start date of the backfill if there is one. 591 end: The end date of the backfill if there is one. 592 create_from: The environment to create the target environment from if it 593 doesn't exist. If not specified, the "prod" environment will be used. 594 skip_tests: Unit tests are run by default so this will skip them if enabled 595 restate_models: A list of of either internal or external models that need to be restated 596 for the given plan interval. If the target environment is a production environment, 597 ALL snapshots that depended on these upstream tables will have their intervals deleted 598 (even ones not in this current environment). Only the snapshots in this environment will 599 be backfilled whereas others need to be recovered on a future plan application. For development 600 environments only snapshots that are part of this plan will be affected. 601 no_gaps: Whether to ensure that new snapshots for models that are already a 602 part of the target environment have no data gaps when compared against previous 603 snapshots for same models. 604 skip_backfill: Whether to skip the backfill step. Default: False. 605 forward_only: Whether the purpose of the plan is to make forward only changes. 606 no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that 607 if this flag is set to true and there are uncategorized changes the plan creation will 608 fail. Default: False. 609 auto_apply: Whether to automatically apply the new plan after creation. Default: False. 610 no_auto_categorization: Indicates whether to disable automatic categorization of model 611 changes (breaking / non-breaking). If not provided, then the corresponding configuration 612 option determines the behavior. 613 614 Returns: 615 The populated Plan object. 616 """ 617 environment = environment or c.PROD 618 environment = Environment.normalize_name(environment) 619 620 if skip_backfill and not no_gaps and environment == c.PROD: 621 raise ConfigError( 622 "When targeting the production enviornment either the backfill should not be skipped or the lack of data gaps should be enforced (--no-gaps flag)." 623 ) 624 625 self._run_plan_tests(skip_tests) 626 627 plan = Plan( 628 context_diff=self._context_diff(environment or c.PROD, create_from=create_from), 629 dag=self.dag, 630 state_reader=self.state_reader, 631 start=start, 632 end=end, 633 apply=self.apply, 634 restate_models=restate_models, 635 no_gaps=no_gaps, 636 skip_backfill=skip_backfill, 637 is_dev=environment != c.PROD, 638 forward_only=forward_only, 639 environment_ttl=self.config.environment_ttl, 640 categorizer_config=self.config.auto_categorize_changes, 641 auto_categorization_enabled=not no_auto_categorization, 642 ) 643 644 if not no_prompts: 645 self.console.plan(plan, auto_apply) 646 elif auto_apply: 647 self.apply(plan) 648 649 return plan
Interactively create a migration plan.
This method compares the current context with an environment. It then presents the differences and asks whether to backfill each modified model.
Arguments:
- environment: The environment to diff and plan against.
- start: The start date of the backfill if there is one.
- end: The end date of the backfill if there is one.
- create_from: The environment to create the target environment from if it doesn't exist. If not specified, the "prod" environment will be used.
- skip_tests: Unit tests are run by default so this will skip them if enabled
- restate_models: A list of of either internal or external models that need to be restated for the given plan interval. If the target environment is a production environment, ALL snapshots that depended on these upstream tables will have their intervals deleted (even ones not in this current environment). Only the snapshots in this environment will be backfilled whereas others need to be recovered on a future plan application. For development environments only snapshots that are part of this plan will be affected.
- no_gaps: Whether to ensure that new snapshots for models that are already a part of the target environment have no data gaps when compared against previous snapshots for same models.
- skip_backfill: Whether to skip the backfill step. Default: False.
- forward_only: Whether the purpose of the plan is to make forward only changes.
- no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that if this flag is set to true and there are uncategorized changes the plan creation will fail. Default: False.
- auto_apply: Whether to automatically apply the new plan after creation. Default: False.
- no_auto_categorization: Indicates whether to disable automatic categorization of model changes (breaking / non-breaking). If not provided, then the corresponding configuration option determines the behavior.
Returns:
The populated Plan object.
651 def apply(self, plan: Plan) -> None: 652 """Applies a plan by pushing snapshots and backfilling data. 653 654 Given a plan, it pushes snapshots into the state sync and then uses the scheduler 655 to backfill all models. 656 657 Args: 658 plan: The plan to apply. 659 """ 660 if not plan.context_diff.has_changes and not plan.requires_backfill: 661 return 662 if plan.uncategorized: 663 raise PlanError("Can't apply a plan with uncategorized changes.") 664 self.config.scheduler.create_plan_evaluator(self).evaluate(plan)
Applies a plan by pushing snapshots and backfilling data.
Given a plan, it pushes snapshots into the state sync and then uses the scheduler to backfill all models.
Arguments:
- plan: The plan to apply.
666 def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> None: 667 """Show a diff of the current context with a given environment. 668 669 Args: 670 environment: The environment to diff against. 671 detailed: Show the actual SQL differences if True. 672 """ 673 environment = environment or c.PROD 674 environment = Environment.normalize_name(environment) 675 self.console.show_model_difference_summary( 676 self._context_diff(environment or c.PROD), detailed 677 )
Show a diff of the current context with a given environment.
Arguments:
- environment: The environment to diff against.
- detailed: Show the actual SQL differences if True.
679 def get_dag(self, format: str = "svg") -> graphviz.Digraph: 680 """Gets a graphviz dag. 681 682 This method requires installing the graphviz base library through your package manager 683 and the python graphviz library. 684 685 To display within Databricks: 686 displayHTML(context.get_dag().pipe(encoding='utf-8')) 687 688 Args: 689 format: The desired format to use for representing the graph 690 """ 691 from sqlmesh import runtime_env 692 693 try: 694 import graphviz # type: ignore 695 except ModuleNotFoundError as e: 696 if runtime_env.is_databricks: 697 raise MissingDependencyError( 698 "Rendering a dag requires graphviz. Run `pip install graphviz` and then `sudo apt-get install -y python3-dev graphviz libgraphviz-dev pkg-config`" 699 ) 700 raise MissingDependencyError( 701 "Rendering a dag requires a manual install of graphviz. Run `pip install graphviz` and then install graphviz library: https://graphviz.org/download/." 702 ) from e 703 704 graph = graphviz.Digraph(node_attr={"shape": "box"}, format=format) 705 706 for name, upstream in self.dag.graph.items(): 707 graph.node(name) 708 for u in upstream: 709 graph.edge(u, name) 710 return graph
Gets a graphviz dag.
This method requires installing the graphviz base library through your package manager and the python graphviz library.
To display within Databricks: displayHTML(context.get_dag().pipe(encoding='utf-8'))
Arguments:
- format: The desired format to use for representing the graph
712 def render_dag(self, path: str, format: str = "jpeg") -> str: 713 """Render the dag using graphviz. 714 715 This method requires installing the graphviz base library through your package manager 716 and the python graphviz library. 717 718 Args: 719 path: filename to save the dag to 720 format: The desired format to use when rending the dag 721 """ 722 graph = self.get_dag(format=format) 723 724 try: 725 return graph.render(path, format=format) 726 except graphviz.backend.execute.ExecutableNotFound as e: 727 raise MissingDependencyError( 728 "Graphviz is pip-installed but the system install is missing. Instructions: https://graphviz.org/download/" 729 ) from e
Render the dag using graphviz.
This method requires installing the graphviz base library through your package manager and the python graphviz library.
Arguments:
- path: filename to save the dag to
- format: The desired format to use when rending the dag
731 def test( 732 self, 733 match_patterns: t.Optional[t.List[str]] = None, 734 tests: t.Optional[t.List[str]] = None, 735 verbose: bool = False, 736 ) -> unittest.result.TestResult: 737 """Discover and run model tests""" 738 verbosity = 2 if verbose else 1 739 try: 740 if tests: 741 result = run_model_tests( 742 tests=tests, 743 snapshots=self.local_snapshots, 744 engine_adapter=self._test_engine_adapter, 745 verbosity=verbosity, 746 patterns=match_patterns, 747 ignore_patterns=self.ignore_patterns, 748 ) 749 else: 750 result = run_all_model_tests( 751 path=self.test_directory_path, 752 snapshots=self.local_snapshots, 753 engine_adapter=self._test_engine_adapter, 754 verbosity=verbosity, 755 patterns=match_patterns, 756 ignore_patterns=self.ignore_patterns, 757 ) 758 finally: 759 self._test_engine_adapter.close() 760 return result
Discover and run model tests
762 def audit( 763 self, 764 start: TimeLike, 765 end: TimeLike, 766 *, 767 models: t.Optional[t.Iterator[str]] = None, 768 latest: t.Optional[TimeLike] = None, 769 ) -> None: 770 """Audit models. 771 772 Args: 773 start: The start of the interval to audit. 774 end: The end of the interval to audit. 775 models: The models to audit. All models will be audited if not specified. 776 latest: The latest time used for non incremental datasets. 777 778 """ 779 780 snapshots = ( 781 [self.snapshots[model] for model in models] if models else self.snapshots.values() 782 ) 783 784 num_audits = sum(len(snapshot.model.audits) for snapshot in snapshots) 785 self.console.log_status_update(f"Found {num_audits} audit(s).") 786 errors = [] 787 for snapshot in snapshots: 788 for audit_result in self.snapshot_evaluator.audit( 789 snapshot=snapshot, 790 start=start, 791 end=end, 792 snapshots=self.snapshots, 793 raise_exception=False, 794 ): 795 if audit_result.count: 796 errors.append(audit_result) 797 self.console.log_status_update(f"{audit_result.audit.name} FAIL.") 798 else: 799 self.console.log_status_update(f"{audit_result.audit.name} PASS.") 800 801 self.console.log_status_update(f"\nFinished with {len(errors)} audit error(s).") 802 for error in errors: 803 self.console.log_status_update( 804 f"\nFailure in audit {error.audit.name} ({error.audit._path})." 805 ) 806 self.console.log_status_update(f"Got {error.count} results, expected 0.") 807 self.console.show_sql(f"{error.query}") 808 self.console.log_status_update("Done.")
Audit models.
Arguments:
- start: The start of the interval to audit.
- end: The end of the interval to audit.
- models: The models to audit. All models will be audited if not specified.
- latest: The latest time used for non incremental datasets.
810 def close(self) -> None: 811 """Releases all resources allocated by this context.""" 812 self.snapshot_evaluator.close()
Releases all resources allocated by this context.