SnapshotEvaluator
A snapshot evaluator is responsible for evaluating a snapshot given some runtime arguments, e.g. start and end timestamps.
Evaluation
Snapshot evaluation involves determining the queries necessary to evaluate a snapshot and using
sqlmesh.core.engine_adapter
to execute the queries. Schemas, tables, and views are created if
they don't exist and data is inserted when applicable.
A snapshot evaluator also promotes and demotes snapshots to a given environment.
Audits
A snapshot evaluator can also run the audits for a snapshot's model. This is often done after a snapshot has been evaluated to check for data quality issues.
For more information about audits, see sqlmesh.core.audit
.
1""" 2# SnapshotEvaluator 3 4A snapshot evaluator is responsible for evaluating a snapshot given some runtime arguments, e.g. start 5and end timestamps. 6 7# Evaluation 8 9Snapshot evaluation involves determining the queries necessary to evaluate a snapshot and using 10`sqlmesh.core.engine_adapter` to execute the queries. Schemas, tables, and views are created if 11they don't exist and data is inserted when applicable. 12 13A snapshot evaluator also promotes and demotes snapshots to a given environment. 14 15# Audits 16 17A snapshot evaluator can also run the audits for a snapshot's model. This is often done after a snapshot 18has been evaluated to check for data quality issues. 19 20For more information about audits, see `sqlmesh.core.audit`. 21""" 22from __future__ import annotations 23 24import logging 25import typing as t 26from contextlib import contextmanager 27 28from sqlglot import exp, select 29from sqlglot.executor import execute 30 31from sqlmesh.core.audit import BUILT_IN_AUDITS, AuditResult 32from sqlmesh.core.engine_adapter import EngineAdapter, TransactionType 33from sqlmesh.core.schema_diff import SchemaDeltaOp, SchemaDiffCalculator 34from sqlmesh.core.snapshot import Snapshot, SnapshotId, SnapshotInfoLike 35from sqlmesh.utils.concurrency import concurrent_apply_to_snapshots 36from sqlmesh.utils.date import TimeLike 37from sqlmesh.utils.errors import AuditError, ConfigError, SQLMeshError 38 39if t.TYPE_CHECKING: 40 from sqlmesh.core.engine_adapter._typing import DF, QueryOrDF 41 42logger = logging.getLogger(__name__) 43 44 45class SnapshotEvaluator: 46 """Evaluates a snapshot given runtime arguments through an arbitrary EngineAdapter. 47 48 The SnapshotEvaluator contains the business logic to generically evaluate a snapshot. 49 It is responsible for delegating queries to the EngineAdapter. The SnapshotEvaluator 50 does not directly communicate with the underlying execution engine. 51 52 Args: 53 adapter: The adapter that interfaces with the execution engine. 54 ddl_concurrent_task: The number of concurrent tasks used for DDL 55 operations (table / view creation, deletion, etc). Default: 1. 56 """ 57 58 def __init__(self, adapter: EngineAdapter, ddl_concurrent_tasks: int = 1): 59 self.adapter = adapter 60 self.ddl_concurrent_tasks = ddl_concurrent_tasks 61 self._schema_diff_calculator = SchemaDiffCalculator(self.adapter) 62 63 def evaluate( 64 self, 65 snapshot: Snapshot, 66 start: TimeLike, 67 end: TimeLike, 68 latest: TimeLike, 69 snapshots: t.Dict[str, Snapshot], 70 limit: t.Optional[int] = None, 71 is_dev: bool = False, 72 **kwargs: t.Any, 73 ) -> t.Optional[DF]: 74 """Evaluate a snapshot, creating its schema and table if it doesn't exist and then inserting it. 75 76 Args: 77 snapshot: Snapshot to evaluate. 78 start: The start datetime to render. 79 end: The end datetime to render. 80 latest: The latest datetime to use for non-incremental queries. 81 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 82 limit: If limit is > 0, the query will not be persisted but evaluated and returned as a dataframe. 83 is_dev: Indicates whether the evaluation happens in the development mode and temporary 84 tables / table clones should be used where applicable. 85 kwargs: Additional kwargs to pass to the renderer. 86 """ 87 if snapshot.is_embedded_kind: 88 return None 89 90 if not limit and not snapshot.is_forward_only: 91 self._ensure_no_paused_forward_only_upstream(snapshot, snapshots) 92 93 logger.info("Evaluating snapshot %s", snapshot.snapshot_id) 94 95 model = snapshot.model 96 columns_to_types = model.columns_to_types 97 table_name = "" if limit else snapshot.table_name(is_dev=is_dev) 98 99 def apply(query_or_df: QueryOrDF, index: int = 0) -> None: 100 if snapshot.is_view_kind: 101 if index > 0: 102 raise ConfigError("Cannot batch view creation.") 103 logger.info("Replacing view '%s'", table_name) 104 self.adapter.create_view(table_name, query_or_df, columns_to_types) 105 elif index > 0: 106 self.adapter.insert_append( 107 table_name, query_or_df, columns_to_types=columns_to_types 108 ) 109 elif snapshot.is_full_kind or snapshot.is_seed_kind: 110 self.adapter.replace_query(table_name, query_or_df, columns_to_types) 111 else: 112 logger.info("Inserting batch (%s, %s) into %s'", start, end, table_name) 113 if snapshot.is_incremental_by_time_range_kind: 114 # A model's time_column could be None but 115 # it shouldn't be for an incremental by time range model 116 assert model.time_column 117 self.adapter.insert_overwrite_by_time_partition( 118 table_name, 119 query_or_df, 120 start=start, 121 end=end, 122 time_formatter=model.convert_to_time_column, 123 time_column=model.time_column, 124 columns_to_types=columns_to_types, 125 ) 126 elif snapshot.is_incremental_by_unique_key_kind: 127 self.adapter.merge( 128 table_name, 129 query_or_df, 130 column_names=columns_to_types.keys(), 131 unique_key=model.unique_key, 132 ) 133 else: 134 raise SQLMeshError(f"Unexpected SnapshotKind: {snapshot.model.kind}") 135 136 for sql_statement in model.sql_statements: 137 self.adapter.execute(sql_statement) 138 139 from sqlmesh.core.context import ExecutionContext 140 141 context = ExecutionContext(self.adapter, snapshots, is_dev) 142 143 model.run_pre_hooks( 144 context=context, 145 start=start, 146 end=end, 147 latest=latest, 148 **kwargs, 149 ) 150 151 queries_or_dfs = model.render( 152 context, 153 start=start, 154 end=end, 155 latest=latest, 156 engine_adapter=self.adapter, 157 **kwargs, 158 ) 159 160 with self.adapter.transaction( 161 transaction_type=TransactionType.DDL 162 if model.kind.is_view or model.kind.is_full 163 else TransactionType.DML 164 ): 165 for index, query_or_df in enumerate(queries_or_dfs): 166 if limit and limit > 0: 167 if isinstance(query_or_df, exp.Select): 168 existing_limit = query_or_df.args.get("limit") 169 if existing_limit: 170 limit = min( 171 limit, 172 execute(exp.select(existing_limit.expression)).rows[0][0], 173 ) 174 return query_or_df.head(limit) if hasattr(query_or_df, "head") else self.adapter._fetch_native_df(query_or_df.limit(limit)) # type: ignore 175 176 apply(query_or_df, index) 177 178 model.run_post_hooks( 179 context=context, 180 start=start, 181 end=end, 182 latest=latest, 183 **kwargs, 184 ) 185 return None 186 187 def promote( 188 self, 189 target_snapshots: t.Iterable[SnapshotInfoLike], 190 environment: str, 191 is_dev: bool = False, 192 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 193 ) -> None: 194 """Promotes the given collection of snapshots in the target environment by replacing a corresponding 195 view with a physical table associated with the given snapshot. 196 197 Args: 198 target_snapshots: Snapshots to promote. 199 environment: The target environment. 200 is_dev: Indicates whether the promotion happens in the development mode and temporary 201 tables / table clones should be used where applicable. 202 on_complete: a callback to call on each successfully promoted snapshot. 203 """ 204 with self.concurrent_context(): 205 concurrent_apply_to_snapshots( 206 target_snapshots, 207 lambda s: self._promote_snapshot(s, environment, is_dev, on_complete), 208 self.ddl_concurrent_tasks, 209 ) 210 211 def demote( 212 self, 213 target_snapshots: t.Iterable[SnapshotInfoLike], 214 environment: str, 215 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 216 ) -> None: 217 """Demotes the given collection of snapshots in the target environment by removing its view. 218 219 Args: 220 target_snapshots: Snapshots to demote. 221 environment: The target environment. 222 on_complete: a callback to call on each successfully demoted snapshot. 223 """ 224 with self.concurrent_context(): 225 concurrent_apply_to_snapshots( 226 target_snapshots, 227 lambda s: self._demote_snapshot(s, environment, on_complete), 228 self.ddl_concurrent_tasks, 229 ) 230 231 def create( 232 self, 233 target_snapshots: t.Iterable[Snapshot], 234 snapshots: t.Dict[SnapshotId, Snapshot], 235 ) -> None: 236 """Creates a physical snapshot schema and table for the given collection of snapshots. 237 238 Args: 239 target_snapshots: Target snapshosts. 240 """ 241 with self.concurrent_context(): 242 concurrent_apply_to_snapshots( 243 target_snapshots, 244 lambda s: self._create_snapshot(s, snapshots), 245 self.ddl_concurrent_tasks, 246 ) 247 248 def migrate(self, target_snapshots: t.Iterable[SnapshotInfoLike]) -> None: 249 """Alters a physical snapshot table to match its snapshot's schema for the given collection of snapshots. 250 251 Args: 252 target_snapshots: Target snapshosts. 253 """ 254 with self.concurrent_context(): 255 concurrent_apply_to_snapshots( 256 target_snapshots, 257 lambda s: self._migrate_snapshot(s), 258 self.ddl_concurrent_tasks, 259 ) 260 261 def cleanup(self, target_snapshots: t.Iterable[SnapshotInfoLike]) -> None: 262 """Cleans up the given snapshots by removing its table 263 264 Args: 265 target_snapshots: Snapshots to cleanup. 266 """ 267 with self.concurrent_context(): 268 concurrent_apply_to_snapshots( 269 target_snapshots, 270 self._cleanup_snapshot, 271 self.ddl_concurrent_tasks, 272 reverse_order=True, 273 ) 274 275 def audit( 276 self, 277 *, 278 snapshot: Snapshot, 279 snapshots: t.Dict[str, Snapshot], 280 start: t.Optional[TimeLike] = None, 281 end: t.Optional[TimeLike] = None, 282 latest: t.Optional[TimeLike] = None, 283 raise_exception: bool = True, 284 is_dev: bool = False, 285 **kwargs: t.Any, 286 ) -> t.List[AuditResult]: 287 """Execute a snapshot's model's audit queries. 288 289 Args: 290 snapshot: Snapshot to evaluate. start: The start datetime to audit. Defaults to epoch start. 291 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 292 start: The start datetime to audit. Defaults to epoch start. 293 end: The end datetime to audit. Defaults to epoch start. 294 latest: The latest datetime to use for non-incremental queries. Defaults to epoch start. 295 raise_exception: Whether to raise an exception if the audit fails. Blocking rules determine if an 296 AuditError is thrown or if we just warn with logger 297 is_dev: Indicates whether the auditing happens in the development mode and temporary 298 tables / table clones should be used where applicable. 299 kwargs: Additional kwargs to pass to the renderer. 300 """ 301 if snapshot.is_temporary_table(is_dev): 302 # We can't audit a temporary table. 303 return [] 304 305 if not snapshot.version: 306 raise ConfigError( 307 f"Cannot audit '{snapshot.name}' because it has not been versioned yet. Apply a plan first." 308 ) 309 310 logger.info("Auditing snapshot %s", snapshot.snapshot_id) 311 312 audits_by_name = {**BUILT_IN_AUDITS, **{a.name: a for a in snapshot.audits}} 313 314 results = [] 315 for audit_name, audit_args in snapshot.model.audits: 316 audit = audits_by_name[audit_name] 317 query = audit.render_query( 318 snapshot, 319 start=start, 320 end=end, 321 latest=latest, 322 snapshots=snapshots, 323 is_dev=is_dev, 324 engine_adapter=self.adapter, 325 **audit_args, 326 **kwargs, 327 ) 328 count, *_ = self.adapter.fetchone(select("COUNT(*)").from_(query.subquery())) 329 if count and raise_exception: 330 message = f"Audit '{audit_name}' for model '{snapshot.model.name}' failed.\nGot {count} results, expected 0.\n{query}" 331 if audit.blocking: 332 raise AuditError(message) 333 else: 334 logger.warning(f"{message}\nAudit is warn only so proceeding with execution.") 335 results.append(AuditResult(audit=audit, count=count, query=query)) 336 return results 337 338 @contextmanager 339 def concurrent_context(self) -> t.Generator[None, None, None]: 340 try: 341 yield 342 finally: 343 self.recycle() 344 345 def recycle(self) -> None: 346 """Closes all open connections and releases all allocated resources associated with any thread 347 except the calling one.""" 348 try: 349 self.adapter.recycle() 350 except Exception: 351 logger.exception("Failed to recycle Snapshot Evaluator") 352 353 def close(self) -> None: 354 """Closes all open connections and releases all allocated resources.""" 355 try: 356 self.adapter.close() 357 except Exception: 358 logger.exception("Failed to close Snapshot Evaluator") 359 360 def _create_snapshot(self, snapshot: Snapshot, snapshots: t.Dict[SnapshotId, Snapshot]) -> None: 361 if snapshot.is_embedded_kind: 362 return 363 364 self.adapter.create_schema(snapshot.physical_schema) 365 366 # If a snapshot reuses an existing version we assume that the table for that version 367 # has already been created, so we only need to create a temporary table or a clone. 368 is_dev = not snapshot.is_new_version 369 table_name = snapshot.table_name(is_dev=is_dev) 370 371 parent_snapshots_by_name = { 372 snapshots[p_sid].name: snapshots[p_sid] for p_sid in snapshot.parents 373 } 374 375 if snapshot.is_view_kind: 376 logger.info("Creating view '%s'", table_name) 377 self.adapter.create_view( 378 table_name, 379 snapshot.model.render_query(snapshots=parent_snapshots_by_name, is_dev=is_dev), 380 ) 381 else: 382 logger.info("Creating table '%s'", table_name) 383 self.adapter.create_table( 384 table_name, 385 query_or_columns_to_types=snapshot.model.columns_to_types 386 if snapshot.model.annotated 387 else snapshot.model.ctas_query(parent_snapshots_by_name, is_dev=is_dev), 388 storage_format=snapshot.model.storage_format, 389 partitioned_by=snapshot.model.partitioned_by, 390 partition_interval_unit=snapshot.model.interval_unit(), 391 ) 392 393 def _migrate_snapshot(self, snapshot: SnapshotInfoLike) -> None: 394 if not snapshot.is_materialized or snapshot.is_new_version: 395 return 396 397 tmp_table_name = snapshot.table_name(is_dev=True) 398 target_table_name = snapshot.table_name() 399 400 schema_deltas = self._schema_diff_calculator.calculate(target_table_name, tmp_table_name) 401 if not schema_deltas: 402 return 403 404 added_columns = {} 405 dropped_columns = [] 406 for delta in schema_deltas: 407 if delta.op == SchemaDeltaOp.ADD: 408 added_columns[delta.column_name] = delta.column_type 409 elif delta.op == SchemaDeltaOp.DROP: 410 dropped_columns.append(delta.column_name) 411 else: 412 raise ConfigError(f"Unsupported schema delta operation: {delta.op}") 413 414 logger.info( 415 "Altering table '%s'. Added columns: %s; dropped columns: %s", 416 target_table_name, 417 added_columns, 418 dropped_columns, 419 ) 420 self.adapter.alter_table(target_table_name, added_columns, dropped_columns) 421 422 def _promote_snapshot( 423 self, 424 snapshot: SnapshotInfoLike, 425 environment: str, 426 is_dev: bool, 427 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]], 428 ) -> None: 429 qualified_view_name = snapshot.qualified_view_name 430 schema = qualified_view_name.schema_for_environment(environment=environment) 431 if schema is not None: 432 self.adapter.create_schema(schema) 433 434 view_name = qualified_view_name.for_environment(environment=environment) 435 if not snapshot.is_embedded_kind: 436 table_name = snapshot.table_name(is_dev=is_dev, for_read=True) 437 logger.info("Updating view '%s' to point at table '%s'", view_name, table_name) 438 self.adapter.create_view(view_name, exp.select("*").from_(table_name)) 439 else: 440 logger.info("Dropping view '%s' for non-materialized table", view_name) 441 self.adapter.drop_view(view_name) 442 443 if on_complete is not None: 444 on_complete(snapshot) 445 446 def _demote_snapshot( 447 self, 448 snapshot: SnapshotInfoLike, 449 environment: str, 450 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]], 451 ) -> None: 452 view_name = snapshot.qualified_view_name.for_environment(environment=environment) 453 logger.info("Dropping view '%s'", view_name) 454 self.adapter.drop_view(view_name) 455 456 if on_complete is not None: 457 on_complete(snapshot) 458 459 def _cleanup_snapshot(self, snapshot: SnapshotInfoLike) -> None: 460 if snapshot.is_embedded_kind: 461 return 462 463 snapshot = snapshot.table_info 464 table_names = [snapshot.table_name()] 465 if snapshot.version != snapshot.fingerprint: 466 table_names.append(snapshot.table_name(is_dev=True)) 467 468 for table_name in table_names: 469 if snapshot.is_materialized: 470 self.adapter.drop_table(table_name) 471 logger.info("Dropped table '%s'", table_name) 472 else: 473 self.adapter.drop_view(table_name) 474 logger.info("Dropped view '%s'", table_name) 475 476 def _ensure_no_paused_forward_only_upstream( 477 self, snapshot: Snapshot, parent_snapshots: t.Dict[str, Snapshot] 478 ) -> None: 479 for p in parent_snapshots.values(): 480 if p.is_forward_only and p.is_paused: 481 raise SQLMeshError( 482 f"Snapshot {snapshot.snapshot_id} depends on a paused forward-only snapshot {p.snapshot_id}. Create and apply a new plan to fix this issue." 483 )
46class SnapshotEvaluator: 47 """Evaluates a snapshot given runtime arguments through an arbitrary EngineAdapter. 48 49 The SnapshotEvaluator contains the business logic to generically evaluate a snapshot. 50 It is responsible for delegating queries to the EngineAdapter. The SnapshotEvaluator 51 does not directly communicate with the underlying execution engine. 52 53 Args: 54 adapter: The adapter that interfaces with the execution engine. 55 ddl_concurrent_task: The number of concurrent tasks used for DDL 56 operations (table / view creation, deletion, etc). Default: 1. 57 """ 58 59 def __init__(self, adapter: EngineAdapter, ddl_concurrent_tasks: int = 1): 60 self.adapter = adapter 61 self.ddl_concurrent_tasks = ddl_concurrent_tasks 62 self._schema_diff_calculator = SchemaDiffCalculator(self.adapter) 63 64 def evaluate( 65 self, 66 snapshot: Snapshot, 67 start: TimeLike, 68 end: TimeLike, 69 latest: TimeLike, 70 snapshots: t.Dict[str, Snapshot], 71 limit: t.Optional[int] = None, 72 is_dev: bool = False, 73 **kwargs: t.Any, 74 ) -> t.Optional[DF]: 75 """Evaluate a snapshot, creating its schema and table if it doesn't exist and then inserting it. 76 77 Args: 78 snapshot: Snapshot to evaluate. 79 start: The start datetime to render. 80 end: The end datetime to render. 81 latest: The latest datetime to use for non-incremental queries. 82 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 83 limit: If limit is > 0, the query will not be persisted but evaluated and returned as a dataframe. 84 is_dev: Indicates whether the evaluation happens in the development mode and temporary 85 tables / table clones should be used where applicable. 86 kwargs: Additional kwargs to pass to the renderer. 87 """ 88 if snapshot.is_embedded_kind: 89 return None 90 91 if not limit and not snapshot.is_forward_only: 92 self._ensure_no_paused_forward_only_upstream(snapshot, snapshots) 93 94 logger.info("Evaluating snapshot %s", snapshot.snapshot_id) 95 96 model = snapshot.model 97 columns_to_types = model.columns_to_types 98 table_name = "" if limit else snapshot.table_name(is_dev=is_dev) 99 100 def apply(query_or_df: QueryOrDF, index: int = 0) -> None: 101 if snapshot.is_view_kind: 102 if index > 0: 103 raise ConfigError("Cannot batch view creation.") 104 logger.info("Replacing view '%s'", table_name) 105 self.adapter.create_view(table_name, query_or_df, columns_to_types) 106 elif index > 0: 107 self.adapter.insert_append( 108 table_name, query_or_df, columns_to_types=columns_to_types 109 ) 110 elif snapshot.is_full_kind or snapshot.is_seed_kind: 111 self.adapter.replace_query(table_name, query_or_df, columns_to_types) 112 else: 113 logger.info("Inserting batch (%s, %s) into %s'", start, end, table_name) 114 if snapshot.is_incremental_by_time_range_kind: 115 # A model's time_column could be None but 116 # it shouldn't be for an incremental by time range model 117 assert model.time_column 118 self.adapter.insert_overwrite_by_time_partition( 119 table_name, 120 query_or_df, 121 start=start, 122 end=end, 123 time_formatter=model.convert_to_time_column, 124 time_column=model.time_column, 125 columns_to_types=columns_to_types, 126 ) 127 elif snapshot.is_incremental_by_unique_key_kind: 128 self.adapter.merge( 129 table_name, 130 query_or_df, 131 column_names=columns_to_types.keys(), 132 unique_key=model.unique_key, 133 ) 134 else: 135 raise SQLMeshError(f"Unexpected SnapshotKind: {snapshot.model.kind}") 136 137 for sql_statement in model.sql_statements: 138 self.adapter.execute(sql_statement) 139 140 from sqlmesh.core.context import ExecutionContext 141 142 context = ExecutionContext(self.adapter, snapshots, is_dev) 143 144 model.run_pre_hooks( 145 context=context, 146 start=start, 147 end=end, 148 latest=latest, 149 **kwargs, 150 ) 151 152 queries_or_dfs = model.render( 153 context, 154 start=start, 155 end=end, 156 latest=latest, 157 engine_adapter=self.adapter, 158 **kwargs, 159 ) 160 161 with self.adapter.transaction( 162 transaction_type=TransactionType.DDL 163 if model.kind.is_view or model.kind.is_full 164 else TransactionType.DML 165 ): 166 for index, query_or_df in enumerate(queries_or_dfs): 167 if limit and limit > 0: 168 if isinstance(query_or_df, exp.Select): 169 existing_limit = query_or_df.args.get("limit") 170 if existing_limit: 171 limit = min( 172 limit, 173 execute(exp.select(existing_limit.expression)).rows[0][0], 174 ) 175 return query_or_df.head(limit) if hasattr(query_or_df, "head") else self.adapter._fetch_native_df(query_or_df.limit(limit)) # type: ignore 176 177 apply(query_or_df, index) 178 179 model.run_post_hooks( 180 context=context, 181 start=start, 182 end=end, 183 latest=latest, 184 **kwargs, 185 ) 186 return None 187 188 def promote( 189 self, 190 target_snapshots: t.Iterable[SnapshotInfoLike], 191 environment: str, 192 is_dev: bool = False, 193 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 194 ) -> None: 195 """Promotes the given collection of snapshots in the target environment by replacing a corresponding 196 view with a physical table associated with the given snapshot. 197 198 Args: 199 target_snapshots: Snapshots to promote. 200 environment: The target environment. 201 is_dev: Indicates whether the promotion happens in the development mode and temporary 202 tables / table clones should be used where applicable. 203 on_complete: a callback to call on each successfully promoted snapshot. 204 """ 205 with self.concurrent_context(): 206 concurrent_apply_to_snapshots( 207 target_snapshots, 208 lambda s: self._promote_snapshot(s, environment, is_dev, on_complete), 209 self.ddl_concurrent_tasks, 210 ) 211 212 def demote( 213 self, 214 target_snapshots: t.Iterable[SnapshotInfoLike], 215 environment: str, 216 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 217 ) -> None: 218 """Demotes the given collection of snapshots in the target environment by removing its view. 219 220 Args: 221 target_snapshots: Snapshots to demote. 222 environment: The target environment. 223 on_complete: a callback to call on each successfully demoted snapshot. 224 """ 225 with self.concurrent_context(): 226 concurrent_apply_to_snapshots( 227 target_snapshots, 228 lambda s: self._demote_snapshot(s, environment, on_complete), 229 self.ddl_concurrent_tasks, 230 ) 231 232 def create( 233 self, 234 target_snapshots: t.Iterable[Snapshot], 235 snapshots: t.Dict[SnapshotId, Snapshot], 236 ) -> None: 237 """Creates a physical snapshot schema and table for the given collection of snapshots. 238 239 Args: 240 target_snapshots: Target snapshosts. 241 """ 242 with self.concurrent_context(): 243 concurrent_apply_to_snapshots( 244 target_snapshots, 245 lambda s: self._create_snapshot(s, snapshots), 246 self.ddl_concurrent_tasks, 247 ) 248 249 def migrate(self, target_snapshots: t.Iterable[SnapshotInfoLike]) -> None: 250 """Alters a physical snapshot table to match its snapshot's schema for the given collection of snapshots. 251 252 Args: 253 target_snapshots: Target snapshosts. 254 """ 255 with self.concurrent_context(): 256 concurrent_apply_to_snapshots( 257 target_snapshots, 258 lambda s: self._migrate_snapshot(s), 259 self.ddl_concurrent_tasks, 260 ) 261 262 def cleanup(self, target_snapshots: t.Iterable[SnapshotInfoLike]) -> None: 263 """Cleans up the given snapshots by removing its table 264 265 Args: 266 target_snapshots: Snapshots to cleanup. 267 """ 268 with self.concurrent_context(): 269 concurrent_apply_to_snapshots( 270 target_snapshots, 271 self._cleanup_snapshot, 272 self.ddl_concurrent_tasks, 273 reverse_order=True, 274 ) 275 276 def audit( 277 self, 278 *, 279 snapshot: Snapshot, 280 snapshots: t.Dict[str, Snapshot], 281 start: t.Optional[TimeLike] = None, 282 end: t.Optional[TimeLike] = None, 283 latest: t.Optional[TimeLike] = None, 284 raise_exception: bool = True, 285 is_dev: bool = False, 286 **kwargs: t.Any, 287 ) -> t.List[AuditResult]: 288 """Execute a snapshot's model's audit queries. 289 290 Args: 291 snapshot: Snapshot to evaluate. start: The start datetime to audit. Defaults to epoch start. 292 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 293 start: The start datetime to audit. Defaults to epoch start. 294 end: The end datetime to audit. Defaults to epoch start. 295 latest: The latest datetime to use for non-incremental queries. Defaults to epoch start. 296 raise_exception: Whether to raise an exception if the audit fails. Blocking rules determine if an 297 AuditError is thrown or if we just warn with logger 298 is_dev: Indicates whether the auditing happens in the development mode and temporary 299 tables / table clones should be used where applicable. 300 kwargs: Additional kwargs to pass to the renderer. 301 """ 302 if snapshot.is_temporary_table(is_dev): 303 # We can't audit a temporary table. 304 return [] 305 306 if not snapshot.version: 307 raise ConfigError( 308 f"Cannot audit '{snapshot.name}' because it has not been versioned yet. Apply a plan first." 309 ) 310 311 logger.info("Auditing snapshot %s", snapshot.snapshot_id) 312 313 audits_by_name = {**BUILT_IN_AUDITS, **{a.name: a for a in snapshot.audits}} 314 315 results = [] 316 for audit_name, audit_args in snapshot.model.audits: 317 audit = audits_by_name[audit_name] 318 query = audit.render_query( 319 snapshot, 320 start=start, 321 end=end, 322 latest=latest, 323 snapshots=snapshots, 324 is_dev=is_dev, 325 engine_adapter=self.adapter, 326 **audit_args, 327 **kwargs, 328 ) 329 count, *_ = self.adapter.fetchone(select("COUNT(*)").from_(query.subquery())) 330 if count and raise_exception: 331 message = f"Audit '{audit_name}' for model '{snapshot.model.name}' failed.\nGot {count} results, expected 0.\n{query}" 332 if audit.blocking: 333 raise AuditError(message) 334 else: 335 logger.warning(f"{message}\nAudit is warn only so proceeding with execution.") 336 results.append(AuditResult(audit=audit, count=count, query=query)) 337 return results 338 339 @contextmanager 340 def concurrent_context(self) -> t.Generator[None, None, None]: 341 try: 342 yield 343 finally: 344 self.recycle() 345 346 def recycle(self) -> None: 347 """Closes all open connections and releases all allocated resources associated with any thread 348 except the calling one.""" 349 try: 350 self.adapter.recycle() 351 except Exception: 352 logger.exception("Failed to recycle Snapshot Evaluator") 353 354 def close(self) -> None: 355 """Closes all open connections and releases all allocated resources.""" 356 try: 357 self.adapter.close() 358 except Exception: 359 logger.exception("Failed to close Snapshot Evaluator") 360 361 def _create_snapshot(self, snapshot: Snapshot, snapshots: t.Dict[SnapshotId, Snapshot]) -> None: 362 if snapshot.is_embedded_kind: 363 return 364 365 self.adapter.create_schema(snapshot.physical_schema) 366 367 # If a snapshot reuses an existing version we assume that the table for that version 368 # has already been created, so we only need to create a temporary table or a clone. 369 is_dev = not snapshot.is_new_version 370 table_name = snapshot.table_name(is_dev=is_dev) 371 372 parent_snapshots_by_name = { 373 snapshots[p_sid].name: snapshots[p_sid] for p_sid in snapshot.parents 374 } 375 376 if snapshot.is_view_kind: 377 logger.info("Creating view '%s'", table_name) 378 self.adapter.create_view( 379 table_name, 380 snapshot.model.render_query(snapshots=parent_snapshots_by_name, is_dev=is_dev), 381 ) 382 else: 383 logger.info("Creating table '%s'", table_name) 384 self.adapter.create_table( 385 table_name, 386 query_or_columns_to_types=snapshot.model.columns_to_types 387 if snapshot.model.annotated 388 else snapshot.model.ctas_query(parent_snapshots_by_name, is_dev=is_dev), 389 storage_format=snapshot.model.storage_format, 390 partitioned_by=snapshot.model.partitioned_by, 391 partition_interval_unit=snapshot.model.interval_unit(), 392 ) 393 394 def _migrate_snapshot(self, snapshot: SnapshotInfoLike) -> None: 395 if not snapshot.is_materialized or snapshot.is_new_version: 396 return 397 398 tmp_table_name = snapshot.table_name(is_dev=True) 399 target_table_name = snapshot.table_name() 400 401 schema_deltas = self._schema_diff_calculator.calculate(target_table_name, tmp_table_name) 402 if not schema_deltas: 403 return 404 405 added_columns = {} 406 dropped_columns = [] 407 for delta in schema_deltas: 408 if delta.op == SchemaDeltaOp.ADD: 409 added_columns[delta.column_name] = delta.column_type 410 elif delta.op == SchemaDeltaOp.DROP: 411 dropped_columns.append(delta.column_name) 412 else: 413 raise ConfigError(f"Unsupported schema delta operation: {delta.op}") 414 415 logger.info( 416 "Altering table '%s'. Added columns: %s; dropped columns: %s", 417 target_table_name, 418 added_columns, 419 dropped_columns, 420 ) 421 self.adapter.alter_table(target_table_name, added_columns, dropped_columns) 422 423 def _promote_snapshot( 424 self, 425 snapshot: SnapshotInfoLike, 426 environment: str, 427 is_dev: bool, 428 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]], 429 ) -> None: 430 qualified_view_name = snapshot.qualified_view_name 431 schema = qualified_view_name.schema_for_environment(environment=environment) 432 if schema is not None: 433 self.adapter.create_schema(schema) 434 435 view_name = qualified_view_name.for_environment(environment=environment) 436 if not snapshot.is_embedded_kind: 437 table_name = snapshot.table_name(is_dev=is_dev, for_read=True) 438 logger.info("Updating view '%s' to point at table '%s'", view_name, table_name) 439 self.adapter.create_view(view_name, exp.select("*").from_(table_name)) 440 else: 441 logger.info("Dropping view '%s' for non-materialized table", view_name) 442 self.adapter.drop_view(view_name) 443 444 if on_complete is not None: 445 on_complete(snapshot) 446 447 def _demote_snapshot( 448 self, 449 snapshot: SnapshotInfoLike, 450 environment: str, 451 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]], 452 ) -> None: 453 view_name = snapshot.qualified_view_name.for_environment(environment=environment) 454 logger.info("Dropping view '%s'", view_name) 455 self.adapter.drop_view(view_name) 456 457 if on_complete is not None: 458 on_complete(snapshot) 459 460 def _cleanup_snapshot(self, snapshot: SnapshotInfoLike) -> None: 461 if snapshot.is_embedded_kind: 462 return 463 464 snapshot = snapshot.table_info 465 table_names = [snapshot.table_name()] 466 if snapshot.version != snapshot.fingerprint: 467 table_names.append(snapshot.table_name(is_dev=True)) 468 469 for table_name in table_names: 470 if snapshot.is_materialized: 471 self.adapter.drop_table(table_name) 472 logger.info("Dropped table '%s'", table_name) 473 else: 474 self.adapter.drop_view(table_name) 475 logger.info("Dropped view '%s'", table_name) 476 477 def _ensure_no_paused_forward_only_upstream( 478 self, snapshot: Snapshot, parent_snapshots: t.Dict[str, Snapshot] 479 ) -> None: 480 for p in parent_snapshots.values(): 481 if p.is_forward_only and p.is_paused: 482 raise SQLMeshError( 483 f"Snapshot {snapshot.snapshot_id} depends on a paused forward-only snapshot {p.snapshot_id}. Create and apply a new plan to fix this issue." 484 )
Evaluates a snapshot given runtime arguments through an arbitrary EngineAdapter.
The SnapshotEvaluator contains the business logic to generically evaluate a snapshot. It is responsible for delegating queries to the EngineAdapter. The SnapshotEvaluator does not directly communicate with the underlying execution engine.
Arguments:
- adapter: The adapter that interfaces with the execution engine.
- ddl_concurrent_task: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc). Default: 1.
64 def evaluate( 65 self, 66 snapshot: Snapshot, 67 start: TimeLike, 68 end: TimeLike, 69 latest: TimeLike, 70 snapshots: t.Dict[str, Snapshot], 71 limit: t.Optional[int] = None, 72 is_dev: bool = False, 73 **kwargs: t.Any, 74 ) -> t.Optional[DF]: 75 """Evaluate a snapshot, creating its schema and table if it doesn't exist and then inserting it. 76 77 Args: 78 snapshot: Snapshot to evaluate. 79 start: The start datetime to render. 80 end: The end datetime to render. 81 latest: The latest datetime to use for non-incremental queries. 82 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 83 limit: If limit is > 0, the query will not be persisted but evaluated and returned as a dataframe. 84 is_dev: Indicates whether the evaluation happens in the development mode and temporary 85 tables / table clones should be used where applicable. 86 kwargs: Additional kwargs to pass to the renderer. 87 """ 88 if snapshot.is_embedded_kind: 89 return None 90 91 if not limit and not snapshot.is_forward_only: 92 self._ensure_no_paused_forward_only_upstream(snapshot, snapshots) 93 94 logger.info("Evaluating snapshot %s", snapshot.snapshot_id) 95 96 model = snapshot.model 97 columns_to_types = model.columns_to_types 98 table_name = "" if limit else snapshot.table_name(is_dev=is_dev) 99 100 def apply(query_or_df: QueryOrDF, index: int = 0) -> None: 101 if snapshot.is_view_kind: 102 if index > 0: 103 raise ConfigError("Cannot batch view creation.") 104 logger.info("Replacing view '%s'", table_name) 105 self.adapter.create_view(table_name, query_or_df, columns_to_types) 106 elif index > 0: 107 self.adapter.insert_append( 108 table_name, query_or_df, columns_to_types=columns_to_types 109 ) 110 elif snapshot.is_full_kind or snapshot.is_seed_kind: 111 self.adapter.replace_query(table_name, query_or_df, columns_to_types) 112 else: 113 logger.info("Inserting batch (%s, %s) into %s'", start, end, table_name) 114 if snapshot.is_incremental_by_time_range_kind: 115 # A model's time_column could be None but 116 # it shouldn't be for an incremental by time range model 117 assert model.time_column 118 self.adapter.insert_overwrite_by_time_partition( 119 table_name, 120 query_or_df, 121 start=start, 122 end=end, 123 time_formatter=model.convert_to_time_column, 124 time_column=model.time_column, 125 columns_to_types=columns_to_types, 126 ) 127 elif snapshot.is_incremental_by_unique_key_kind: 128 self.adapter.merge( 129 table_name, 130 query_or_df, 131 column_names=columns_to_types.keys(), 132 unique_key=model.unique_key, 133 ) 134 else: 135 raise SQLMeshError(f"Unexpected SnapshotKind: {snapshot.model.kind}") 136 137 for sql_statement in model.sql_statements: 138 self.adapter.execute(sql_statement) 139 140 from sqlmesh.core.context import ExecutionContext 141 142 context = ExecutionContext(self.adapter, snapshots, is_dev) 143 144 model.run_pre_hooks( 145 context=context, 146 start=start, 147 end=end, 148 latest=latest, 149 **kwargs, 150 ) 151 152 queries_or_dfs = model.render( 153 context, 154 start=start, 155 end=end, 156 latest=latest, 157 engine_adapter=self.adapter, 158 **kwargs, 159 ) 160 161 with self.adapter.transaction( 162 transaction_type=TransactionType.DDL 163 if model.kind.is_view or model.kind.is_full 164 else TransactionType.DML 165 ): 166 for index, query_or_df in enumerate(queries_or_dfs): 167 if limit and limit > 0: 168 if isinstance(query_or_df, exp.Select): 169 existing_limit = query_or_df.args.get("limit") 170 if existing_limit: 171 limit = min( 172 limit, 173 execute(exp.select(existing_limit.expression)).rows[0][0], 174 ) 175 return query_or_df.head(limit) if hasattr(query_or_df, "head") else self.adapter._fetch_native_df(query_or_df.limit(limit)) # type: ignore 176 177 apply(query_or_df, index) 178 179 model.run_post_hooks( 180 context=context, 181 start=start, 182 end=end, 183 latest=latest, 184 **kwargs, 185 ) 186 return None
Evaluate a snapshot, creating its schema and table if it doesn't exist and then inserting it.
Arguments:
- snapshot: Snapshot to evaluate.
- start: The start datetime to render.
- end: The end datetime to render.
- latest: The latest datetime to use for non-incremental queries.
- snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
- limit: If limit is > 0, the query will not be persisted but evaluated and returned as a dataframe.
- is_dev: Indicates whether the evaluation happens in the development mode and temporary tables / table clones should be used where applicable.
- kwargs: Additional kwargs to pass to the renderer.
188 def promote( 189 self, 190 target_snapshots: t.Iterable[SnapshotInfoLike], 191 environment: str, 192 is_dev: bool = False, 193 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 194 ) -> None: 195 """Promotes the given collection of snapshots in the target environment by replacing a corresponding 196 view with a physical table associated with the given snapshot. 197 198 Args: 199 target_snapshots: Snapshots to promote. 200 environment: The target environment. 201 is_dev: Indicates whether the promotion happens in the development mode and temporary 202 tables / table clones should be used where applicable. 203 on_complete: a callback to call on each successfully promoted snapshot. 204 """ 205 with self.concurrent_context(): 206 concurrent_apply_to_snapshots( 207 target_snapshots, 208 lambda s: self._promote_snapshot(s, environment, is_dev, on_complete), 209 self.ddl_concurrent_tasks, 210 )
Promotes the given collection of snapshots in the target environment by replacing a corresponding view with a physical table associated with the given snapshot.
Arguments:
- target_snapshots: Snapshots to promote.
- environment: The target environment.
- is_dev: Indicates whether the promotion happens in the development mode and temporary tables / table clones should be used where applicable.
- on_complete: a callback to call on each successfully promoted snapshot.
212 def demote( 213 self, 214 target_snapshots: t.Iterable[SnapshotInfoLike], 215 environment: str, 216 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 217 ) -> None: 218 """Demotes the given collection of snapshots in the target environment by removing its view. 219 220 Args: 221 target_snapshots: Snapshots to demote. 222 environment: The target environment. 223 on_complete: a callback to call on each successfully demoted snapshot. 224 """ 225 with self.concurrent_context(): 226 concurrent_apply_to_snapshots( 227 target_snapshots, 228 lambda s: self._demote_snapshot(s, environment, on_complete), 229 self.ddl_concurrent_tasks, 230 )
Demotes the given collection of snapshots in the target environment by removing its view.
Arguments:
- target_snapshots: Snapshots to demote.
- environment: The target environment.
- on_complete: a callback to call on each successfully demoted snapshot.
232 def create( 233 self, 234 target_snapshots: t.Iterable[Snapshot], 235 snapshots: t.Dict[SnapshotId, Snapshot], 236 ) -> None: 237 """Creates a physical snapshot schema and table for the given collection of snapshots. 238 239 Args: 240 target_snapshots: Target snapshosts. 241 """ 242 with self.concurrent_context(): 243 concurrent_apply_to_snapshots( 244 target_snapshots, 245 lambda s: self._create_snapshot(s, snapshots), 246 self.ddl_concurrent_tasks, 247 )
Creates a physical snapshot schema and table for the given collection of snapshots.
Arguments:
- target_snapshots: Target snapshosts.
249 def migrate(self, target_snapshots: t.Iterable[SnapshotInfoLike]) -> None: 250 """Alters a physical snapshot table to match its snapshot's schema for the given collection of snapshots. 251 252 Args: 253 target_snapshots: Target snapshosts. 254 """ 255 with self.concurrent_context(): 256 concurrent_apply_to_snapshots( 257 target_snapshots, 258 lambda s: self._migrate_snapshot(s), 259 self.ddl_concurrent_tasks, 260 )
Alters a physical snapshot table to match its snapshot's schema for the given collection of snapshots.
Arguments:
- target_snapshots: Target snapshosts.
262 def cleanup(self, target_snapshots: t.Iterable[SnapshotInfoLike]) -> None: 263 """Cleans up the given snapshots by removing its table 264 265 Args: 266 target_snapshots: Snapshots to cleanup. 267 """ 268 with self.concurrent_context(): 269 concurrent_apply_to_snapshots( 270 target_snapshots, 271 self._cleanup_snapshot, 272 self.ddl_concurrent_tasks, 273 reverse_order=True, 274 )
Cleans up the given snapshots by removing its table
Arguments:
- target_snapshots: Snapshots to cleanup.
276 def audit( 277 self, 278 *, 279 snapshot: Snapshot, 280 snapshots: t.Dict[str, Snapshot], 281 start: t.Optional[TimeLike] = None, 282 end: t.Optional[TimeLike] = None, 283 latest: t.Optional[TimeLike] = None, 284 raise_exception: bool = True, 285 is_dev: bool = False, 286 **kwargs: t.Any, 287 ) -> t.List[AuditResult]: 288 """Execute a snapshot's model's audit queries. 289 290 Args: 291 snapshot: Snapshot to evaluate. start: The start datetime to audit. Defaults to epoch start. 292 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 293 start: The start datetime to audit. Defaults to epoch start. 294 end: The end datetime to audit. Defaults to epoch start. 295 latest: The latest datetime to use for non-incremental queries. Defaults to epoch start. 296 raise_exception: Whether to raise an exception if the audit fails. Blocking rules determine if an 297 AuditError is thrown or if we just warn with logger 298 is_dev: Indicates whether the auditing happens in the development mode and temporary 299 tables / table clones should be used where applicable. 300 kwargs: Additional kwargs to pass to the renderer. 301 """ 302 if snapshot.is_temporary_table(is_dev): 303 # We can't audit a temporary table. 304 return [] 305 306 if not snapshot.version: 307 raise ConfigError( 308 f"Cannot audit '{snapshot.name}' because it has not been versioned yet. Apply a plan first." 309 ) 310 311 logger.info("Auditing snapshot %s", snapshot.snapshot_id) 312 313 audits_by_name = {**BUILT_IN_AUDITS, **{a.name: a for a in snapshot.audits}} 314 315 results = [] 316 for audit_name, audit_args in snapshot.model.audits: 317 audit = audits_by_name[audit_name] 318 query = audit.render_query( 319 snapshot, 320 start=start, 321 end=end, 322 latest=latest, 323 snapshots=snapshots, 324 is_dev=is_dev, 325 engine_adapter=self.adapter, 326 **audit_args, 327 **kwargs, 328 ) 329 count, *_ = self.adapter.fetchone(select("COUNT(*)").from_(query.subquery())) 330 if count and raise_exception: 331 message = f"Audit '{audit_name}' for model '{snapshot.model.name}' failed.\nGot {count} results, expected 0.\n{query}" 332 if audit.blocking: 333 raise AuditError(message) 334 else: 335 logger.warning(f"{message}\nAudit is warn only so proceeding with execution.") 336 results.append(AuditResult(audit=audit, count=count, query=query)) 337 return results
Execute a snapshot's model's audit queries.
Arguments:
- snapshot: Snapshot to evaluate. start: The start datetime to audit. Defaults to epoch start.
- snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
- start: The start datetime to audit. Defaults to epoch start.
- end: The end datetime to audit. Defaults to epoch start.
- latest: The latest datetime to use for non-incremental queries. Defaults to epoch start.
- raise_exception: Whether to raise an exception if the audit fails. Blocking rules determine if an AuditError is thrown or if we just warn with logger
- is_dev: Indicates whether the auditing happens in the development mode and temporary tables / table clones should be used where applicable.
- kwargs: Additional kwargs to pass to the renderer.
346 def recycle(self) -> None: 347 """Closes all open connections and releases all allocated resources associated with any thread 348 except the calling one.""" 349 try: 350 self.adapter.recycle() 351 except Exception: 352 logger.exception("Failed to recycle Snapshot Evaluator")
Closes all open connections and releases all allocated resources associated with any thread except the calling one.
354 def close(self) -> None: 355 """Closes all open connections and releases all allocated resources.""" 356 try: 357 self.adapter.close() 358 except Exception: 359 logger.exception("Failed to close Snapshot Evaluator")
Closes all open connections and releases all allocated resources.