sqlmesh.core.model.definition
1from __future__ import annotations 2 3import ast 4import sys 5import types 6import typing as t 7from difflib import unified_diff 8from itertools import zip_longest 9from pathlib import Path 10 11import numpy as np 12import pandas as pd 13from astor import to_source 14from pandas.core.dtypes.common import is_numeric_dtype 15from pydantic import Field 16from sqlglot import exp 17from sqlglot.diff import ChangeDistiller, Insert, Keep 18from sqlglot.optimizer.scope import traverse_scope 19from sqlglot.schema import MappingSchema 20from sqlglot.time import format_time 21 22from sqlmesh.core import constants as c 23from sqlmesh.core import dialect as d 24from sqlmesh.core.engine_adapter import PySparkDataFrame 25from sqlmesh.core.hooks import HookRegistry, hook 26from sqlmesh.core.macros import MacroEvaluator, MacroRegistry, macro 27from sqlmesh.core.model.common import expression_validator, parse_model_name 28from sqlmesh.core.model.kind import SeedKind 29from sqlmesh.core.model.meta import HookCall, ModelMeta 30from sqlmesh.core.model.seed import Seed, create_seed 31from sqlmesh.core.renderer import ExpressionRenderer, QueryRenderer 32from sqlmesh.utils.date import TimeLike, make_inclusive, to_datetime 33from sqlmesh.utils.errors import ConfigError, SQLMeshError, raise_config_error 34from sqlmesh.utils.jinja import JinjaMacroRegistry 35from sqlmesh.utils.metaprogramming import ( 36 Executable, 37 build_env, 38 prepare_env, 39 print_exception, 40 serialize_env, 41) 42from sqlmesh.utils.pandas import filter_df_by_timelike 43 44if t.TYPE_CHECKING: 45 from sqlmesh.core.audit import Audit 46 from sqlmesh.core.context import ExecutionContext 47 from sqlmesh.core.engine_adapter import EngineAdapter 48 from sqlmesh.core.engine_adapter._typing import DF, QueryOrDF 49 from sqlmesh.core.snapshot import Snapshot 50 51if sys.version_info >= (3, 9): 52 from typing import Annotated, Literal 53else: 54 from typing_extensions import Annotated, Literal 55 56 57class _Model(ModelMeta, frozen=True): 58 """Model is the core abstraction for user defined datasets. 59 60 A model consists of logic that fetches the data (a SQL query, a Python script or a seed) and metadata 61 associated with it. Models can be run on arbitrary cadences and support incremental or full refreshes. 62 Models can also be materialized into physical tables or shared across other models as temporary views. 63 64 Example: 65 MODEL ( 66 name sushi.order_items, 67 owner jen, 68 cron '@daily', 69 batch_size 30, 70 start '2020-01-01', 71 partitioned_by ds 72 ); 73 74 @DEF(var, 'my_var'); 75 76 SELECT 77 1 AS column_a # my first column, 78 @var AS my_column #my second column, 79 ; 80 81 Args: 82 name: The name of the model, which is of the form [catalog].[db].table. 83 The catalog and db are optional. 84 dialect: The SQL dialect that the model's query is written in. By default, 85 this is assumed to be the dialect of the context. 86 owner: The owner of the model. 87 cron: A cron string specifying how often the model should be refreshed, leveraging the 88 [croniter](https://github.com/kiorky/croniter) library. 89 description: The optional model description. 90 stamp: An optional arbitrary string sequence used to create new model versions without making 91 changes to any of the functional components of the definition. 92 start: The earliest date that the model will be backfilled for. If this is None, 93 then the date is inferred by taking the most recent start date of its ancestors. 94 The start date can be a static datetime or a relative datetime like "1 year ago" 95 batch_size: The maximum number of intervals that can be run per backfill job. If this is None, 96 then backfilling this model will do all of history in one job. If this is set, a model's backfill 97 will be chunked such that each individual job will only contain jobs with max `batch_size` intervals. 98 storage_format: The storage format used to store the physical table, only applicable in certain engines. 99 (eg. 'parquet') 100 partitioned_by: The partition columns, only applicable in certain engines. (eg. (ds, hour)) 101 pre: Pre-hooks to run before the model executes. 102 post: Post-hooks to run after the model executes. 103 expressions: All of the expressions between the model definition and final query, used for setting certain variables or environments. 104 python_env: Dictionary containing all global variables needed to render the model's macros. 105 """ 106 107 expressions_: t.Optional[t.List[exp.Expression]] = Field(default=None, alias="expressions") 108 python_env_: t.Optional[t.Dict[str, Executable]] = Field(default=None, alias="python_env") 109 jinja_macros: JinjaMacroRegistry = JinjaMacroRegistry() 110 111 _path: Path = Path() 112 _depends_on: t.Optional[t.Set[str]] = None 113 _column_descriptions: t.Optional[t.Dict[str, str]] = None 114 115 _expressions_validator = expression_validator 116 117 def render( 118 self, 119 context: ExecutionContext, 120 *, 121 start: t.Optional[TimeLike] = None, 122 end: t.Optional[TimeLike] = None, 123 latest: t.Optional[TimeLike] = None, 124 engine_adapter: t.Optional[EngineAdapter] = None, 125 **kwargs: t.Any, 126 ) -> t.Generator[QueryOrDF, None, None]: 127 """Renders the content of this model in a form of either a SELECT query, executing which the data for this model can 128 be fetched, or a dataframe object which contains the data itself. 129 130 The type of the returned object (query or dataframe) depends on whether the model was sourced from a SQL query, 131 a Python script or a pre-built dataset (seed). 132 133 Args: 134 context: The execution context used for fetching data. 135 start: The start date/time of the run. 136 end: The end date/time of the run. 137 latest: The latest date/time to use for the run. 138 139 Returns: 140 A generator which yields eiether a query object or one of the supported dataframe objects. 141 """ 142 yield self.render_query( 143 start=start, 144 end=end, 145 latest=latest, 146 snapshots=context.snapshots, 147 is_dev=context.is_dev, 148 engine_adapter=engine_adapter, 149 **kwargs, 150 ) 151 152 def render_definition(self, include_python: bool = True) -> t.List[exp.Expression]: 153 """Returns the original list of sql expressions comprising the model definition. 154 155 Args: 156 include_python: Whether or not to include Python code in the rendered definition. 157 """ 158 expressions = [] 159 comment = None 160 for field in ModelMeta.__fields__.values(): 161 field_value = getattr(self, field.name) 162 163 if field_value != field.default: 164 if field.name == "description": 165 comment = field_value 166 elif field.name == "kind": 167 expressions.append( 168 exp.Property( 169 this="kind", 170 value=field_value.to_expression(dialect=self.dialect), 171 ) 172 ) 173 else: 174 expressions.append( 175 exp.Property( 176 this=field.alias or field.name, 177 value=META_FIELD_CONVERTER.get(field.name, exp.to_identifier)( 178 field_value 179 ), 180 ) 181 ) 182 183 model = d.Model(expressions=expressions) 184 model.comments = [comment] if comment else None 185 186 python_expressions = [] 187 if include_python: 188 python_env = d.PythonCode( 189 expressions=[ 190 v.payload if v.is_import or v.is_definition else f"{k} = {v.payload}" 191 for k, v in self.sorted_python_env 192 ] 193 ) 194 if python_env.expressions: 195 python_expressions.append(python_env) 196 197 return [ 198 model, 199 *self.expressions, 200 *python_expressions, 201 ] 202 203 def render_query( 204 self, 205 *, 206 start: t.Optional[TimeLike] = None, 207 end: t.Optional[TimeLike] = None, 208 latest: t.Optional[TimeLike] = None, 209 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 210 expand: t.Iterable[str] = tuple(), 211 is_dev: bool = False, 212 engine_adapter: t.Optional[EngineAdapter] = None, 213 **kwargs: t.Any, 214 ) -> exp.Subqueryable: 215 """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models. 216 217 Args: 218 start: The start datetime to render. Defaults to epoch start. 219 end: The end datetime to render. Defaults to epoch start. 220 latest: The latest datetime to use for non-incremental queries. Defaults to epoch start. 221 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 222 expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries 223 that depend on materialized tables. Model definitions are inlined and can thus be run end to 224 end on the fly. 225 audit_name: The name of audit if the query to render is for an audit. 226 is_dev: Indicates whether the rendering happens in the development mode and temporary 227 tables / table clones should be used where applicable. 228 kwargs: Additional kwargs to pass to the renderer. 229 230 Returns: 231 The rendered expression. 232 """ 233 return exp.select( 234 *( 235 exp.alias_(f"NULL::{column_type}", name) 236 for name, column_type in self.columns_to_types.items() 237 ) 238 ).from_(exp.values([tuple([1])], alias="t", columns=["dummy"])) 239 240 def ctas_query( 241 self, snapshots: t.Dict[str, Snapshot], is_dev: bool = False 242 ) -> exp.Subqueryable: 243 """Return a dummy query to do a CTAS. 244 245 If a model's column types are unknown, the only way to create the table is to 246 run the fully expanded query. This can be expensive so we add a WHERE FALSE to all 247 SELECTS and hopefully the optimizer is smart enough to not do anything. 248 249 Args: 250 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 251 is_dev: Indicates whether the creation happens in the development mode and temporary 252 tables / table clones should be used where applicable. 253 Return: 254 The mocked out ctas query. 255 """ 256 query = self.render_query(snapshots=snapshots, is_dev=is_dev) 257 # the query is expanded so it's been copied, it's safe to mutate. 258 for select in query.find_all(exp.Select): 259 select.where("FALSE", copy=False) 260 261 return query 262 263 def run_pre_hooks( 264 self, 265 context: ExecutionContext, 266 start: t.Optional[TimeLike] = None, 267 end: t.Optional[TimeLike] = None, 268 latest: t.Optional[TimeLike] = None, 269 **kwargs: t.Any, 270 ) -> None: 271 """Runs all pre hooks. 272 273 Args: 274 context: The execution context used for running the hook. 275 start: The start date/time of the run. 276 end: The end date/time of the run. 277 latest: The latest date/time to use for the run. 278 """ 279 self._run_hooks(self.pre, context=context, start=start, end=end, latest=latest, **kwargs) 280 281 def run_post_hooks( 282 self, 283 context: ExecutionContext, 284 start: t.Optional[TimeLike] = None, 285 end: t.Optional[TimeLike] = None, 286 latest: t.Optional[TimeLike] = None, 287 **kwargs: t.Any, 288 ) -> None: 289 """Runs all pre hooks. 290 291 Args: 292 context: The execution context used for running the hook. 293 start: The start date/time of the run. 294 end: The end date/time of the run. 295 latest: The latest date/time to use for the run. 296 """ 297 self._run_hooks(self.post, context=context, start=start, end=end, latest=latest, **kwargs) 298 299 def referenced_audits(self, audits: t.Dict[str, Audit]) -> t.List[Audit]: 300 """Returns audits referenced in this model. 301 302 Args: 303 audits: Available audits by name. 304 """ 305 from sqlmesh.core.audit import BUILT_IN_AUDITS 306 307 referenced_audits = [] 308 for audit_name, _ in self.audits: 309 if audit_name in audits: 310 referenced_audits.append(audits[audit_name]) 311 elif audit_name not in BUILT_IN_AUDITS: 312 raise_config_error( 313 f"Unknown audit '{audit_name}' referenced in model '{self.name}'", 314 self._path, 315 ) 316 return referenced_audits 317 318 def update_schema(self, schema: MappingSchema) -> None: 319 """Updates the schema associated with this model. 320 321 Args: 322 schema: The new schema. 323 """ 324 325 def text_diff(self, other: Model) -> str: 326 """Produce a text diff against another model. 327 328 Args: 329 other: The model to diff against. 330 331 Returns: 332 A unified text diff showing additions and deletions. 333 """ 334 meta_a, *statements_a, query_a = self.render_definition() 335 meta_b, *statements_b, query_b = other.render_definition() 336 return "\n".join( 337 ( 338 d.text_diff(meta_a, meta_b, self.dialect), 339 *( 340 d.text_diff(sa, sb, self.dialect) 341 for sa, sb in zip_longest(statements_a, statements_b) 342 ), 343 d.text_diff(query_a, query_b, self.dialect), 344 ) 345 ).strip() 346 347 def set_time_format(self, default_time_format: str = c.DEFAULT_TIME_COLUMN_FORMAT) -> None: 348 """Sets the default time format for a model. 349 350 Args: 351 default_time_format: A python time format used as the default format when none is provided. 352 """ 353 if not self.time_column: 354 return 355 356 if self.time_column.format: 357 # Transpile the time column format into the generic dialect 358 formatted_time = format_time( 359 self.time_column.format, 360 d.Dialect.get_or_raise(self.dialect).time_mapping, 361 ) 362 assert formatted_time is not None 363 self.time_column.format = formatted_time 364 else: 365 self.time_column.format = default_time_format 366 367 def convert_to_time_column(self, time: TimeLike) -> exp.Expression: 368 """Convert a TimeLike object to the same time format and type as the model's time column.""" 369 if self.time_column: 370 if self.time_column.format: 371 time = to_datetime(time).strftime(self.time_column.format) 372 373 time_column_type = self.columns_to_types[self.time_column.column] 374 if time_column_type.this in exp.DataType.TEXT_TYPES: 375 return exp.Literal.string(time) 376 elif time_column_type.this in exp.DataType.NUMERIC_TYPES: 377 return exp.Literal.number(time) 378 elif time_column_type.this in exp.DataType.TEMPORAL_TYPES: 379 return exp.cast(exp.Literal.string(time), time_column_type) 380 return exp.convert(time) 381 382 @property 383 def depends_on(self) -> t.Set[str]: 384 """All of the upstream dependencies referenced in the model's query, excluding self references. 385 386 Returns: 387 A list of all the upstream table names. 388 """ 389 if self.depends_on_ is not None: 390 return self.depends_on_ 391 392 if self._depends_on is None: 393 self._depends_on = _find_tables(self.render_query()) - {self.name} 394 return self._depends_on 395 396 @property 397 def columns_to_types(self) -> t.Dict[str, exp.DataType]: 398 """Returns the mapping of column names to types of this model.""" 399 if self.columns_to_types_ is not None: 400 return self.columns_to_types_ 401 raise SQLMeshError(f"Column information has not been provided for model '{self.name}'") 402 403 @property 404 def annotated(self) -> bool: 405 """Checks if all column projection types of this model are known.""" 406 return all( 407 column_type.this != exp.DataType.Type.UNKNOWN 408 for column_type in self.columns_to_types.values() 409 ) 410 411 @property 412 def sorted_python_env(self) -> t.List[t.Tuple[str, Executable]]: 413 """Returns the python env sorted by executable kind and then var name.""" 414 return sorted(self.python_env.items(), key=lambda x: (x[1].kind, x[0])) 415 416 @property 417 def macro_definitions(self) -> t.List[d.MacroDef]: 418 """All macro definitions from the list of expressions.""" 419 return [s for s in self.expressions if isinstance(s, d.MacroDef)] 420 421 @property 422 def sql_statements(self) -> t.Iterator[exp.Expression]: 423 """All sql statements from the list of expressions.""" 424 rendered_statements = ( 425 self._expression_renderer(s).render() 426 for s in self.expressions 427 if not isinstance(s, d.MacroDef) 428 ) 429 return (statement for statement in rendered_statements if statement is not None) 430 431 @property 432 def view_name(self) -> str: 433 return parse_model_name(self.name)[2] 434 435 @property 436 def expressions(self) -> t.List[exp.Expression]: 437 return self.expressions_ or [] 438 439 @property 440 def python_env(self) -> t.Dict[str, Executable]: 441 return self.python_env_ or {} 442 443 @property 444 def contains_star_query(self) -> bool: 445 """Returns True if the model's query contains a star projection.""" 446 return False 447 448 @property 449 def is_sql(self) -> bool: 450 return False 451 452 @property 453 def is_python(self) -> bool: 454 return False 455 456 @property 457 def is_seed(self) -> bool: 458 return False 459 460 def validate_definition(self) -> None: 461 """Validates the model's definition. 462 463 Model's are not allowed to have duplicate column names, non-explicitly casted columns, 464 or non infererrable column names. 465 466 Raises: 467 ConfigError 468 """ 469 if self.partitioned_by: 470 unique_partition_keys = {k.strip().lower() for k in self.partitioned_by} 471 if len(self.partitioned_by) != len(unique_partition_keys): 472 raise_config_error( 473 "All partition keys must be unique in the model definition", 474 self._path, 475 ) 476 477 column_names = {c.lower() for c in self.columns_to_types} 478 missing_keys = unique_partition_keys - column_names 479 if missing_keys: 480 missing_keys_str = ", ".join(f"'{k}'" for k in sorted(missing_keys)) 481 raise_config_error( 482 f"Partition keys [{missing_keys_str}] are missing in the model definition", 483 self._path, 484 ) 485 486 if self.kind.is_incremental_by_time_range and not self.time_column: 487 raise_config_error( 488 "Incremental by time range models must have a time_column field.", 489 self._path, 490 ) 491 492 def is_breaking_change(self, previous: Model) -> t.Optional[bool]: 493 """Determines whether this model is a breaking change in relation to the `previous` model. 494 495 Args: 496 previous: The previous model to compare against. 497 498 Returns: 499 True if this model instance represents a breaking change, False if it's a non-breaking change 500 and None if the nature of the change can't be determined. 501 """ 502 return None 503 504 def _run_hooks( 505 self, 506 hooks: t.List[HookCall], 507 *, 508 context: ExecutionContext, 509 start: t.Optional[TimeLike] = None, 510 end: t.Optional[TimeLike] = None, 511 latest: t.Optional[TimeLike] = None, 512 **kwargs: t.Any, 513 ) -> None: 514 env = prepare_env(self.python_env) 515 start, end = make_inclusive(start or c.EPOCH, end or c.EPOCH) 516 latest = to_datetime(latest or c.EPOCH) 517 518 macro_evaluator = MacroEvaluator() 519 520 for hook in hooks: 521 if isinstance(hook, exp.Expression): 522 rendered = self._expression_renderer(hook).render( 523 start=start, 524 end=end, 525 latest=latest, 526 engine_adapter=context.engine_adapter, 527 **kwargs, 528 ) 529 if rendered: 530 context.engine_adapter.execute(rendered) 531 else: 532 name, hook_kwargs = hook 533 # Evaluate SQL expressions before passing them into a Python 534 # function as arguments. 535 evaluated_hook_kwargs = { 536 key: macro_evaluator.eval_expression(value) 537 if isinstance(value, exp.Expression) 538 else value 539 for key, value in hook_kwargs.items() 540 } 541 env[name]( 542 context=context, 543 start=start, 544 end=end, 545 latest=latest, 546 **{**kwargs, **evaluated_hook_kwargs}, 547 ) 548 549 def _expression_renderer(self, expression: exp.Expression) -> ExpressionRenderer: 550 return ExpressionRenderer( 551 expression, 552 self.dialect, 553 self.macro_definitions, 554 path=self._path, 555 jinja_macro_registry=self.jinja_macros, 556 python_env=self.python_env, 557 ) 558 559 560class SqlModel(_Model): 561 """The model definition which relies on a SQL query to fetch the data. 562 563 Args: 564 query: The main query representing the model. 565 """ 566 567 query: t.Union[exp.Subqueryable, d.Jinja] 568 source_type: Literal["sql"] = "sql" 569 570 _columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None 571 __query_renderer: t.Optional[QueryRenderer] = None 572 573 _query_validator = expression_validator 574 575 def render_query( 576 self, 577 *, 578 start: t.Optional[TimeLike] = None, 579 end: t.Optional[TimeLike] = None, 580 latest: t.Optional[TimeLike] = None, 581 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 582 expand: t.Iterable[str] = tuple(), 583 is_dev: bool = False, 584 engine_adapter: t.Optional[EngineAdapter] = None, 585 **kwargs: t.Any, 586 ) -> exp.Subqueryable: 587 return self._query_renderer.render( 588 start=start, 589 end=end, 590 latest=latest, 591 add_incremental_filter=True, 592 snapshots=snapshots, 593 expand=expand, 594 is_dev=is_dev, 595 engine_adapter=engine_adapter, 596 **kwargs, 597 ) 598 599 def render_definition(self, include_python: bool = True) -> t.List[exp.Expression]: 600 result = super().render_definition(include_python=include_python) 601 result.append(self.query) 602 return result 603 604 @property 605 def is_sql(self) -> bool: 606 return True 607 608 @property 609 def contains_star_query(self) -> bool: 610 return self._query_renderer.contains_star_query 611 612 def update_schema(self, schema: MappingSchema) -> None: 613 self._query_renderer.update_schema(schema) 614 615 @property 616 def columns_to_types(self) -> t.Dict[str, exp.DataType]: 617 if self.columns_to_types_ is not None: 618 return self.columns_to_types_ 619 620 if self._columns_to_types is None: 621 self._columns_to_types = { 622 expression.alias_or_name: expression.type 623 for expression in self._query_renderer.render().expressions 624 } 625 626 return self._columns_to_types 627 628 @property 629 def column_descriptions(self) -> t.Dict[str, str]: 630 if self.column_descriptions_ is not None: 631 return self.column_descriptions_ 632 633 if self._column_descriptions is None: 634 self._column_descriptions = { 635 select.alias: "\n".join(comment.strip() for comment in select.comments) 636 for select in self.render_query().expressions 637 if select.comments 638 } 639 return self._column_descriptions 640 641 def validate_definition(self) -> None: 642 query = self._query_renderer.render() 643 644 if not isinstance(query, exp.Subqueryable): 645 raise_config_error("Missing SELECT query in the model definition", self._path) 646 647 projection_list = ( 648 query.expressions if not isinstance(query, exp.Union) else query.this.expressions 649 ) 650 if not projection_list: 651 raise_config_error("Query missing select statements", self._path) 652 653 name_counts: t.Dict[str, int] = {} 654 for expression in projection_list: 655 alias = expression.alias_or_name 656 if alias == "*": 657 continue 658 if not alias: 659 raise_config_error( 660 f"Outer projection '{expression}' must have inferrable names or explicit aliases.", 661 self._path, 662 ) 663 name_counts[alias] = name_counts.get(alias, 0) + 1 664 665 for name, count in name_counts.items(): 666 if count > 1: 667 raise_config_error(f"Found duplicate outer select name '{name}'", self._path) 668 669 super().validate_definition() 670 671 def is_breaking_change(self, previous: Model) -> t.Optional[bool]: 672 if not isinstance(previous, SqlModel): 673 return None 674 675 edits = ChangeDistiller(t=0.5).diff(previous.render_query(), self.render_query()) 676 inserted_expressions = {e.expression for e in edits if isinstance(e, Insert)} 677 678 for edit in edits: 679 if isinstance(edit, Insert): 680 expr = edit.expression 681 if _is_udtf(expr) or ( 682 not _is_projection(expr) and expr.parent not in inserted_expressions 683 ): 684 return None 685 elif not isinstance(edit, Keep): 686 return None 687 688 return False 689 690 @property 691 def _query_renderer(self) -> QueryRenderer: 692 if self.__query_renderer is None: 693 self.__query_renderer = QueryRenderer( 694 self.query, 695 self.dialect, 696 self.macro_definitions, 697 path=self._path, 698 jinja_macro_registry=self.jinja_macros, 699 python_env=self.python_env, 700 time_column=self.time_column, 701 time_converter=self.convert_to_time_column, 702 only_latest=self.kind.only_latest, 703 ) 704 return self.__query_renderer 705 706 def __repr__(self) -> str: 707 return f"Model<name: {self.name}, query: {str(self.query)[0:30]}>" 708 709 710class SeedModel(_Model): 711 """The model definition which uses a pre-built static dataset to source the data from. 712 713 Args: 714 seed: The content of a pre-built static dataset. 715 """ 716 717 kind: SeedKind 718 seed: Seed 719 source_type: Literal["seed"] = "seed" 720 721 def render( 722 self, 723 context: ExecutionContext, 724 *, 725 start: t.Optional[TimeLike] = None, 726 end: t.Optional[TimeLike] = None, 727 latest: t.Optional[TimeLike] = None, 728 engine_adapter: t.Optional[EngineAdapter] = None, 729 **kwargs: t.Any, 730 ) -> t.Generator[QueryOrDF, None, None]: 731 yield from self.seed.read(batch_size=self.kind.batch_size) 732 733 def text_diff(self, other: Model) -> str: 734 if not isinstance(other, SeedModel): 735 return super().text_diff(other) 736 737 meta_a = self.render_definition()[0] 738 meta_b = other.render_definition()[0] 739 return "\n".join( 740 ( 741 d.text_diff(meta_a, meta_b, self.dialect), 742 *unified_diff( 743 self.seed.content.split("\n"), 744 other.seed.content.split("\n"), 745 ), 746 ) 747 ).strip() 748 749 @property 750 def columns_to_types(self) -> t.Dict[str, exp.DataType]: 751 if self.columns_to_types_ is not None: 752 return self.columns_to_types_ 753 return self.seed.columns_to_types 754 755 @property 756 def is_seed(self) -> bool: 757 return True 758 759 @property 760 def seed_path(self) -> Path: 761 seed_path = Path(self.kind.path) 762 if not seed_path.is_absolute(): 763 return self._path.parent / seed_path 764 return seed_path 765 766 def is_breaking_change(self, previous: Model) -> t.Optional[bool]: 767 if not isinstance(previous, SeedModel): 768 return None 769 770 new_df = pd.concat([df for df in self.seed.read()]) 771 old_df = pd.concat([df for df in previous.seed.read()]) 772 773 new_columns = set(new_df.columns) 774 old_columns = set(old_df.columns) 775 776 if not new_columns.issuperset(old_columns): 777 return None 778 779 for col in old_columns: 780 if new_df[col].dtype != old_df[col].dtype or new_df[col].shape != old_df[col].shape: 781 return None 782 elif is_numeric_dtype(new_df[col]): 783 if not all(np.isclose(new_df[col], old_df[col])): 784 return None 785 else: 786 if not new_df[col].equals(old_df[col]): 787 return None 788 789 return False 790 791 def __repr__(self) -> str: 792 return f"Model<name: {self.name}, seed: {self.kind.path}>" 793 794 795class PythonModel(_Model): 796 """The model definition which relies on a Python script to fetch the data. 797 798 Args: 799 entrypoint: The name of a Python function which contains the data fetching / transformation logic. 800 """ 801 802 entrypoint: str 803 source_type: Literal["python"] = "python" 804 805 def render( 806 self, 807 context: ExecutionContext, 808 *, 809 start: t.Optional[TimeLike] = None, 810 end: t.Optional[TimeLike] = None, 811 latest: t.Optional[TimeLike] = None, 812 engine_adapter: t.Optional[EngineAdapter] = None, 813 **kwargs: t.Any, 814 ) -> t.Generator[DF, None, None]: 815 env = prepare_env(self.python_env) 816 start, end = make_inclusive(start or c.EPOCH, end or c.EPOCH) 817 latest = to_datetime(latest or c.EPOCH) 818 try: 819 df_or_iter = env[self.entrypoint]( 820 context=context, start=start, end=end, latest=latest, **kwargs 821 ) 822 823 if not isinstance(df_or_iter, types.GeneratorType): 824 df_or_iter = [df_or_iter] 825 826 for df in df_or_iter: 827 if self.kind.is_incremental_by_time_range: 828 assert self.time_column 829 830 if PySparkDataFrame is not None and isinstance(df, PySparkDataFrame): 831 import pyspark 832 833 df = df.where( 834 pyspark.sql.functions.col(self.time_column.column).between( 835 pyspark.sql.functions.lit( 836 self.convert_to_time_column(start).sql("spark") 837 ), 838 pyspark.sql.functions.lit( 839 self.convert_to_time_column(end).sql("spark") 840 ), 841 ) 842 ) 843 else: 844 assert self.time_column.format, "Time column format is required." 845 df = filter_df_by_timelike( 846 df, self.time_column.column, self.time_column.format, start, end 847 ) 848 yield df 849 except Exception as e: 850 print_exception(e, self.python_env) 851 raise SQLMeshError(f"Error executing Python model '{self.name}'") 852 853 def render_definition(self, include_python: bool = True) -> t.List[exp.Expression]: 854 # Ignore the provided value for the include_python flag, since the Pyhon model's 855 # definition without Python code is meaningless. 856 return super().render_definition(include_python=True) 857 858 @property 859 def is_python(self) -> bool: 860 return True 861 862 def __repr__(self) -> str: 863 return f"Model<name: {self.name}, entrypoint: {self.entrypoint}>" 864 865 866Model = Annotated[t.Union[SqlModel, SeedModel, PythonModel], Field(discriminator="source_type")] 867 868 869def load_model( 870 expressions: t.List[exp.Expression], 871 *, 872 defaults: t.Optional[t.Dict[str, t.Any]] = None, 873 path: Path = Path(), 874 module_path: Path = Path(), 875 time_column_format: str = c.DEFAULT_TIME_COLUMN_FORMAT, 876 macros: t.Optional[MacroRegistry] = None, 877 hooks: t.Optional[HookRegistry] = None, 878 python_env: t.Optional[t.Dict[str, Executable]] = None, 879 dialect: t.Optional[str] = None, 880 **kwargs: t.Any, 881) -> Model: 882 """Load a model from a parsed SQLMesh model file. 883 884 Args: 885 expressions: Model, *Statements, Query. 886 defaults: Definition default values. 887 path: An optional path to the model definition file. 888 module_path: The python module path to serialize macros for. 889 time_column_format: The default time column format to use if no model time column is configured. 890 macros: The custom registry of macros. If not provided the default registry will be used. 891 hooks: The custom registry of hooks. If not provided the default registry will be used. 892 python_env: The custom Python environment for hooks/macros. If not provided the environment will be constructed 893 from the macro registry. 894 dialect: The default dialect if no model dialect is configured. 895 The format must adhere to Python's strftime codes. 896 kwargs: Additional kwargs to pass to the loader. 897 """ 898 if not expressions: 899 raise_config_error("Incomplete model definition, missing MODEL statement", path) 900 901 dialect = dialect or "" 902 meta = expressions[0] 903 query = expressions[-1] if len(expressions) > 1 else None 904 statements = expressions[1:-1] 905 906 if not isinstance(meta, d.Model): 907 raise_config_error( 908 "MODEL statement is required as the first statement in the definition", 909 path, 910 ) 911 912 meta_fields: t.Dict[str, t.Any] = { 913 "dialect": dialect, 914 "description": "\n".join(comment.strip() for comment in meta.comments) 915 if meta.comments 916 else None, 917 **{prop.name.lower(): prop.args.get("value") for prop in meta.expressions}, 918 **kwargs, 919 } 920 921 name = meta_fields.pop("name", "") 922 if not name: 923 raise_config_error("Model must have a name", path) 924 925 if isinstance(query, d.MacroVar): 926 if python_env is None: 927 raise_config_error("The python environment must be provided for Python models", path) 928 raise 929 930 return create_python_model( 931 name, 932 query.name, 933 python_env, 934 defaults=defaults, 935 path=path, 936 time_column_format=time_column_format, 937 **meta_fields, 938 ) 939 elif query is not None: 940 return create_sql_model( 941 name, 942 query, 943 statements=statements, 944 defaults=defaults, 945 path=path, 946 module_path=module_path, 947 time_column_format=time_column_format, 948 macros=macros, 949 hooks=hooks, 950 python_env=python_env, 951 **meta_fields, 952 ) 953 else: 954 try: 955 seed_properties = { 956 p.name.lower(): p.args.get("value") for p in meta_fields.pop("kind").expressions 957 } 958 return create_seed_model( 959 name, 960 SeedKind(**seed_properties), 961 defaults=defaults, 962 path=path, 963 **meta_fields, 964 ) 965 except Exception: 966 raise_config_error( 967 "The model definition must either have a SELECT query or a valid Seed kind", 968 path, 969 ) 970 raise 971 972 973def create_sql_model( 974 name: str, 975 query: exp.Expression, 976 *, 977 statements: t.Optional[t.List[exp.Expression]] = None, 978 defaults: t.Optional[t.Dict[str, t.Any]] = None, 979 path: Path = Path(), 980 module_path: Path = Path(), 981 time_column_format: str = c.DEFAULT_TIME_COLUMN_FORMAT, 982 macros: t.Optional[MacroRegistry] = None, 983 hooks: t.Optional[HookRegistry] = None, 984 python_env: t.Optional[t.Dict[str, Executable]] = None, 985 dialect: t.Optional[str] = None, 986 **kwargs: t.Any, 987) -> Model: 988 """Creates a SQL model. 989 990 Args: 991 name: The name of the model, which is of the form [catalog].[db].table. 992 The catalog and db are optional. 993 query: The model's logic in a form of a SELECT query. 994 statements: The list of all SQL statements that are not a query or a model definition. 995 defaults: Definition default values. 996 path: An optional path to the model definition file. 997 module_path: The python module path to serialize macros for. 998 time_column_format: The default time column format to use if no model time column is configured. 999 macros: The custom registry of macros. If not provided the default registry will be used. 1000 hooks: The custom registry of hooks. If not provided the default registry will be used. 1001 python_env: The custom Python environment for hooks/macros. If not provided the environment will be constructed 1002 from the macro registry. 1003 dialect: The default dialect if no model dialect is configured. 1004 The format must adhere to Python's strftime codes. 1005 """ 1006 if not isinstance(query, (exp.Subqueryable, d.Jinja)): 1007 raise_config_error( 1008 "A query is required and must be a SELECT or UNION statement.", 1009 path, 1010 ) 1011 1012 if not python_env: 1013 python_env = _python_env( 1014 query, 1015 _extract_hooks(kwargs), 1016 module_path, 1017 macros or macro.get_registry(), 1018 hooks or hook.get_registry(), 1019 ) 1020 1021 return _create_model( 1022 SqlModel, 1023 name, 1024 defaults=defaults, 1025 path=path, 1026 time_column_format=time_column_format, 1027 python_env=python_env, 1028 dialect=dialect, 1029 expressions=statements or [], 1030 query=query, 1031 **kwargs, 1032 ) 1033 1034 1035def create_seed_model( 1036 name: str, 1037 seed_kind: SeedKind, 1038 *, 1039 defaults: t.Optional[t.Dict[str, t.Any]] = None, 1040 path: Path = Path(), 1041 **kwargs: t.Any, 1042) -> Model: 1043 """Creates a Seed model. 1044 1045 Args: 1046 name: The name of the model, which is of the form [catalog].[db].table. 1047 The catalog and db are optional. 1048 seed_kind: The information about the location of a seed and other related configuration. 1049 defaults: Definition default values. 1050 path: An optional path to the model definition file. 1051 """ 1052 seed_path = Path(seed_kind.path) 1053 if not seed_path.is_absolute(): 1054 seed_path = path / seed_path if path.is_dir() else path.parents[0] / seed_path 1055 seed = create_seed(seed_path) 1056 return _create_model( 1057 SeedModel, 1058 name, 1059 defaults=defaults, 1060 path=path, 1061 seed=seed, 1062 kind=seed_kind, 1063 **kwargs, 1064 ) 1065 1066 1067def create_python_model( 1068 name: str, 1069 entrypoint: str, 1070 python_env: t.Dict[str, Executable], 1071 *, 1072 defaults: t.Optional[t.Dict[str, t.Any]] = None, 1073 path: Path = Path(), 1074 time_column_format: str = c.DEFAULT_TIME_COLUMN_FORMAT, 1075 depends_on: t.Optional[t.Set[str]] = None, 1076 **kwargs: t.Any, 1077) -> Model: 1078 """Creates a Python model. 1079 1080 Args: 1081 name: The name of the model, which is of the form [catalog].[db].table. 1082 The catalog and db are optional. 1083 entrypoint: The name of a Python function which contains the data fetching / transformation logic. 1084 python_env: The Python environment of all objects referenced by the model implementation. 1085 defaults: Definition default values. 1086 path: An optional path to the model definition file. 1087 time_column_format: The default time column format to use if no model time column is configured. 1088 depends_on: The custom set of model's upstream dependencies. 1089 """ 1090 # Find dependencies for python models by parsing code if they are not explicitly defined 1091 depends_on = ( 1092 _parse_depends_on(entrypoint, python_env) 1093 if depends_on is None and python_env is not None 1094 else None 1095 ) 1096 return _create_model( 1097 PythonModel, 1098 name, 1099 defaults=defaults, 1100 path=path, 1101 time_column_format=time_column_format, 1102 depends_on=depends_on, 1103 entrypoint=entrypoint, 1104 python_env=python_env, 1105 **kwargs, 1106 ) 1107 1108 1109def _create_model( 1110 klass: t.Type[_Model], 1111 name: str, 1112 *, 1113 defaults: t.Optional[t.Dict[str, t.Any]] = None, 1114 path: Path = Path(), 1115 time_column_format: str = c.DEFAULT_TIME_COLUMN_FORMAT, 1116 depends_on: t.Optional[t.Set[str]] = None, 1117 dialect: t.Optional[str] = None, 1118 expressions: t.Optional[t.List[exp.Expression]] = None, 1119 **kwargs: t.Any, 1120) -> Model: 1121 _validate_model_fields(klass, {"name", *kwargs}, path) 1122 1123 dialect = dialect or "" 1124 1125 try: 1126 model = klass( 1127 name=name, 1128 expressions=expressions or [], 1129 **{ 1130 **(defaults or {}), 1131 "dialect": dialect, 1132 "depends_on": depends_on, 1133 **kwargs, 1134 }, 1135 ) 1136 except Exception as ex: 1137 raise_config_error(str(ex), location=path) 1138 raise 1139 1140 model._path = path 1141 model.set_time_format(time_column_format) 1142 model.validate_definition() 1143 1144 return t.cast(Model, model) 1145 1146 1147def _validate_model_fields(klass: t.Type[_Model], provided_fields: t.Set[str], path: Path) -> None: 1148 missing_required_fields = klass.missing_required_fields(provided_fields) 1149 if missing_required_fields: 1150 raise_config_error( 1151 f"Missing required fields {missing_required_fields} in the model definition", 1152 path, 1153 ) 1154 1155 extra_fields = klass.extra_fields(provided_fields) 1156 if extra_fields: 1157 raise_config_error(f"Invalid extra fields {extra_fields} in the model definition", path) 1158 1159 1160def _find_tables(query: exp.Expression) -> t.Set[str]: 1161 """Find all tables referenced in a query. 1162 1163 Args: 1164 query: The expression to find tables for. 1165 1166 Returns: 1167 A Set of all the table names. 1168 """ 1169 return { 1170 exp.table_name(table) 1171 for scope in traverse_scope(query) 1172 for table in scope.tables 1173 if isinstance(table.this, exp.Identifier) and exp.table_name(table) not in scope.cte_sources 1174 } 1175 1176 1177def _python_env( 1178 query: exp.Expression, 1179 hook_calls: t.List[HookCall], 1180 module_path: Path, 1181 macros: MacroRegistry, 1182 hooks: HookRegistry, 1183) -> t.Dict[str, Executable]: 1184 python_env: t.Dict[str, Executable] = {} 1185 1186 used_macros = {} 1187 1188 def _capture_expression_macros(expression: exp.Expression) -> None: 1189 if isinstance(expression, d.Jinja): 1190 for var in expression.expressions: 1191 if var in macros: 1192 used_macros[var] = macros[var] 1193 else: 1194 for macro_func in expression.find_all(d.MacroFunc): 1195 if macro_func.__class__ is d.MacroFunc: 1196 name = macro_func.this.name.lower() 1197 used_macros[name] = macros[name] 1198 1199 _capture_expression_macros(query) 1200 1201 for hook in hook_calls: 1202 if isinstance(hook, exp.Expression): 1203 _capture_expression_macros(hook) 1204 else: 1205 name = hook[0] 1206 build_env( 1207 hooks[name].func, 1208 env=python_env, 1209 name=name, 1210 path=module_path, 1211 ) 1212 1213 for name, macro in used_macros.items(): 1214 if not macro.func.__module__.startswith("sqlmesh."): 1215 build_env( 1216 macro.func, 1217 env=python_env, 1218 name=name, 1219 path=module_path, 1220 ) 1221 1222 return serialize_env(python_env, path=module_path) 1223 1224 1225def _parse_depends_on(model_func: str, python_env: t.Dict[str, Executable]) -> t.Set[str]: 1226 """Parses the source of a model function and finds upstream dependencies based on calls to context.""" 1227 env = prepare_env(python_env) 1228 depends_on = set() 1229 executable = python_env[model_func] 1230 1231 for node in ast.walk(ast.parse(executable.payload)): 1232 if not isinstance(node, ast.Call): 1233 continue 1234 1235 func = node.func 1236 1237 if ( 1238 isinstance(func, ast.Attribute) 1239 and isinstance(func.value, ast.Name) 1240 and func.value.id == "context" 1241 and func.attr == "table" 1242 ): 1243 if node.args: 1244 table: t.Optional[ast.expr] = node.args[0] 1245 else: 1246 table = next( 1247 (keyword.value for keyword in node.keywords if keyword.arg == "model_name"), 1248 None, 1249 ) 1250 1251 try: 1252 expression = to_source(table) 1253 depends_on.add(eval(expression, env)) 1254 except Exception: 1255 raise ConfigError( 1256 f"Error resolving dependencies for '{executable.path}'. References to context must be resolvable at parse time.\n\n{expression}" 1257 ) 1258 1259 return depends_on 1260 1261 1262def _extract_hooks(kwargs: t.Dict[str, t.Any]) -> t.List[HookCall]: 1263 return (ModelMeta._value_or_tuple_with_args_validator(kwargs.get("pre")) or []) + ( 1264 ModelMeta._value_or_tuple_with_args_validator(kwargs.get("post")) or [] 1265 ) 1266 1267 1268def _list_of_calls_to_exp(value: t.List[t.Tuple[str, t.Dict[str, t.Any]]]) -> exp.Expression: 1269 return exp.Tuple( 1270 expressions=[ 1271 exp.Anonymous( 1272 this=v[0], 1273 expressions=[ 1274 exp.EQ(this=exp.convert(left), expression=exp.convert(right)) 1275 for left, right in v[1].items() 1276 ], 1277 ) 1278 for v in value 1279 ] 1280 ) 1281 1282 1283def _is_projection(expr: exp.Expression) -> bool: 1284 parent = expr.parent 1285 return isinstance(parent, exp.Select) and expr in parent.expressions 1286 1287 1288def _is_udtf(expr: exp.Expression) -> bool: 1289 return isinstance(expr, (exp.Explode, exp.Posexplode, exp.Unnest)) or ( 1290 isinstance(expr, exp.Anonymous) 1291 and expr.this.upper() in ("EXPLODE_OUTER", "POSEXPLODE_OUTER", "UNNEST") 1292 ) 1293 1294 1295META_FIELD_CONVERTER: t.Dict[str, t.Callable] = { 1296 "name": lambda value: exp.to_table(value), 1297 "start": lambda value: exp.Literal.string(value), 1298 "cron": lambda value: exp.Literal.string(value), 1299 "batch_size": lambda value: exp.Literal.number(value), 1300 "partitioned_by_": lambda value: ( 1301 exp.to_identifier(value[0]) if len(value) == 1 else exp.Tuple(expressions=value) 1302 ), 1303 "depends_on_": lambda value: exp.Tuple(expressions=value), 1304 "pre": _list_of_calls_to_exp, 1305 "post": _list_of_calls_to_exp, 1306 "audits": _list_of_calls_to_exp, 1307 "columns_to_types_": lambda value: exp.Schema( 1308 expressions=[exp.ColumnDef(this=exp.to_column(c), kind=t) for c, t in value.items()] 1309 ), 1310}
561class SqlModel(_Model): 562 """The model definition which relies on a SQL query to fetch the data. 563 564 Args: 565 query: The main query representing the model. 566 """ 567 568 query: t.Union[exp.Subqueryable, d.Jinja] 569 source_type: Literal["sql"] = "sql" 570 571 _columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None 572 __query_renderer: t.Optional[QueryRenderer] = None 573 574 _query_validator = expression_validator 575 576 def render_query( 577 self, 578 *, 579 start: t.Optional[TimeLike] = None, 580 end: t.Optional[TimeLike] = None, 581 latest: t.Optional[TimeLike] = None, 582 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 583 expand: t.Iterable[str] = tuple(), 584 is_dev: bool = False, 585 engine_adapter: t.Optional[EngineAdapter] = None, 586 **kwargs: t.Any, 587 ) -> exp.Subqueryable: 588 return self._query_renderer.render( 589 start=start, 590 end=end, 591 latest=latest, 592 add_incremental_filter=True, 593 snapshots=snapshots, 594 expand=expand, 595 is_dev=is_dev, 596 engine_adapter=engine_adapter, 597 **kwargs, 598 ) 599 600 def render_definition(self, include_python: bool = True) -> t.List[exp.Expression]: 601 result = super().render_definition(include_python=include_python) 602 result.append(self.query) 603 return result 604 605 @property 606 def is_sql(self) -> bool: 607 return True 608 609 @property 610 def contains_star_query(self) -> bool: 611 return self._query_renderer.contains_star_query 612 613 def update_schema(self, schema: MappingSchema) -> None: 614 self._query_renderer.update_schema(schema) 615 616 @property 617 def columns_to_types(self) -> t.Dict[str, exp.DataType]: 618 if self.columns_to_types_ is not None: 619 return self.columns_to_types_ 620 621 if self._columns_to_types is None: 622 self._columns_to_types = { 623 expression.alias_or_name: expression.type 624 for expression in self._query_renderer.render().expressions 625 } 626 627 return self._columns_to_types 628 629 @property 630 def column_descriptions(self) -> t.Dict[str, str]: 631 if self.column_descriptions_ is not None: 632 return self.column_descriptions_ 633 634 if self._column_descriptions is None: 635 self._column_descriptions = { 636 select.alias: "\n".join(comment.strip() for comment in select.comments) 637 for select in self.render_query().expressions 638 if select.comments 639 } 640 return self._column_descriptions 641 642 def validate_definition(self) -> None: 643 query = self._query_renderer.render() 644 645 if not isinstance(query, exp.Subqueryable): 646 raise_config_error("Missing SELECT query in the model definition", self._path) 647 648 projection_list = ( 649 query.expressions if not isinstance(query, exp.Union) else query.this.expressions 650 ) 651 if not projection_list: 652 raise_config_error("Query missing select statements", self._path) 653 654 name_counts: t.Dict[str, int] = {} 655 for expression in projection_list: 656 alias = expression.alias_or_name 657 if alias == "*": 658 continue 659 if not alias: 660 raise_config_error( 661 f"Outer projection '{expression}' must have inferrable names or explicit aliases.", 662 self._path, 663 ) 664 name_counts[alias] = name_counts.get(alias, 0) + 1 665 666 for name, count in name_counts.items(): 667 if count > 1: 668 raise_config_error(f"Found duplicate outer select name '{name}'", self._path) 669 670 super().validate_definition() 671 672 def is_breaking_change(self, previous: Model) -> t.Optional[bool]: 673 if not isinstance(previous, SqlModel): 674 return None 675 676 edits = ChangeDistiller(t=0.5).diff(previous.render_query(), self.render_query()) 677 inserted_expressions = {e.expression for e in edits if isinstance(e, Insert)} 678 679 for edit in edits: 680 if isinstance(edit, Insert): 681 expr = edit.expression 682 if _is_udtf(expr) or ( 683 not _is_projection(expr) and expr.parent not in inserted_expressions 684 ): 685 return None 686 elif not isinstance(edit, Keep): 687 return None 688 689 return False 690 691 @property 692 def _query_renderer(self) -> QueryRenderer: 693 if self.__query_renderer is None: 694 self.__query_renderer = QueryRenderer( 695 self.query, 696 self.dialect, 697 self.macro_definitions, 698 path=self._path, 699 jinja_macro_registry=self.jinja_macros, 700 python_env=self.python_env, 701 time_column=self.time_column, 702 time_converter=self.convert_to_time_column, 703 only_latest=self.kind.only_latest, 704 ) 705 return self.__query_renderer 706 707 def __repr__(self) -> str: 708 return f"Model<name: {self.name}, query: {str(self.query)[0:30]}>"
The model definition which relies on a SQL query to fetch the data.
Arguments:
- query: The main query representing the model.
576 def render_query( 577 self, 578 *, 579 start: t.Optional[TimeLike] = None, 580 end: t.Optional[TimeLike] = None, 581 latest: t.Optional[TimeLike] = None, 582 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 583 expand: t.Iterable[str] = tuple(), 584 is_dev: bool = False, 585 engine_adapter: t.Optional[EngineAdapter] = None, 586 **kwargs: t.Any, 587 ) -> exp.Subqueryable: 588 return self._query_renderer.render( 589 start=start, 590 end=end, 591 latest=latest, 592 add_incremental_filter=True, 593 snapshots=snapshots, 594 expand=expand, 595 is_dev=is_dev, 596 engine_adapter=engine_adapter, 597 **kwargs, 598 )
Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models.
Arguments:
- start: The start datetime to render. Defaults to epoch start.
- end: The end datetime to render. Defaults to epoch start.
- latest: The latest datetime to use for non-incremental queries. Defaults to epoch start.
- snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
- expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries that depend on materialized tables. Model definitions are inlined and can thus be run end to end on the fly.
- audit_name: The name of audit if the query to render is for an audit.
- is_dev: Indicates whether the rendering happens in the development mode and temporary tables / table clones should be used where applicable.
- kwargs: Additional kwargs to pass to the renderer.
Returns:
The rendered expression.
600 def render_definition(self, include_python: bool = True) -> t.List[exp.Expression]: 601 result = super().render_definition(include_python=include_python) 602 result.append(self.query) 603 return result
Returns the original list of sql expressions comprising the model definition.
Arguments:
- include_python: Whether or not to include Python code in the rendered definition.
613 def update_schema(self, schema: MappingSchema) -> None: 614 self._query_renderer.update_schema(schema)
Updates the schema associated with this model.
Arguments:
- schema: The new schema.
Returns the mapping of column names to types of this model.
642 def validate_definition(self) -> None: 643 query = self._query_renderer.render() 644 645 if not isinstance(query, exp.Subqueryable): 646 raise_config_error("Missing SELECT query in the model definition", self._path) 647 648 projection_list = ( 649 query.expressions if not isinstance(query, exp.Union) else query.this.expressions 650 ) 651 if not projection_list: 652 raise_config_error("Query missing select statements", self._path) 653 654 name_counts: t.Dict[str, int] = {} 655 for expression in projection_list: 656 alias = expression.alias_or_name 657 if alias == "*": 658 continue 659 if not alias: 660 raise_config_error( 661 f"Outer projection '{expression}' must have inferrable names or explicit aliases.", 662 self._path, 663 ) 664 name_counts[alias] = name_counts.get(alias, 0) + 1 665 666 for name, count in name_counts.items(): 667 if count > 1: 668 raise_config_error(f"Found duplicate outer select name '{name}'", self._path) 669 670 super().validate_definition()
Validates the model's definition.
Model's are not allowed to have duplicate column names, non-explicitly casted columns, or non infererrable column names.
Raises:
- ConfigError
672 def is_breaking_change(self, previous: Model) -> t.Optional[bool]: 673 if not isinstance(previous, SqlModel): 674 return None 675 676 edits = ChangeDistiller(t=0.5).diff(previous.render_query(), self.render_query()) 677 inserted_expressions = {e.expression for e in edits if isinstance(e, Insert)} 678 679 for edit in edits: 680 if isinstance(edit, Insert): 681 expr = edit.expression 682 if _is_udtf(expr) or ( 683 not _is_projection(expr) and expr.parent not in inserted_expressions 684 ): 685 return None 686 elif not isinstance(edit, Keep): 687 return None 688 689 return False
Determines whether this model is a breaking change in relation to the previous
model.
Arguments:
- previous: The previous model to compare against.
Returns:
True if this model instance represents a breaking change, False if it's a non-breaking change and None if the nature of the change can't be determined.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
- _Model
- render
- ctas_query
- run_pre_hooks
- run_post_hooks
- referenced_audits
- text_diff
- set_time_format
- convert_to_time_column
- depends_on
- annotated
- sorted_python_env
- macro_definitions
- sql_statements
711class SeedModel(_Model): 712 """The model definition which uses a pre-built static dataset to source the data from. 713 714 Args: 715 seed: The content of a pre-built static dataset. 716 """ 717 718 kind: SeedKind 719 seed: Seed 720 source_type: Literal["seed"] = "seed" 721 722 def render( 723 self, 724 context: ExecutionContext, 725 *, 726 start: t.Optional[TimeLike] = None, 727 end: t.Optional[TimeLike] = None, 728 latest: t.Optional[TimeLike] = None, 729 engine_adapter: t.Optional[EngineAdapter] = None, 730 **kwargs: t.Any, 731 ) -> t.Generator[QueryOrDF, None, None]: 732 yield from self.seed.read(batch_size=self.kind.batch_size) 733 734 def text_diff(self, other: Model) -> str: 735 if not isinstance(other, SeedModel): 736 return super().text_diff(other) 737 738 meta_a = self.render_definition()[0] 739 meta_b = other.render_definition()[0] 740 return "\n".join( 741 ( 742 d.text_diff(meta_a, meta_b, self.dialect), 743 *unified_diff( 744 self.seed.content.split("\n"), 745 other.seed.content.split("\n"), 746 ), 747 ) 748 ).strip() 749 750 @property 751 def columns_to_types(self) -> t.Dict[str, exp.DataType]: 752 if self.columns_to_types_ is not None: 753 return self.columns_to_types_ 754 return self.seed.columns_to_types 755 756 @property 757 def is_seed(self) -> bool: 758 return True 759 760 @property 761 def seed_path(self) -> Path: 762 seed_path = Path(self.kind.path) 763 if not seed_path.is_absolute(): 764 return self._path.parent / seed_path 765 return seed_path 766 767 def is_breaking_change(self, previous: Model) -> t.Optional[bool]: 768 if not isinstance(previous, SeedModel): 769 return None 770 771 new_df = pd.concat([df for df in self.seed.read()]) 772 old_df = pd.concat([df for df in previous.seed.read()]) 773 774 new_columns = set(new_df.columns) 775 old_columns = set(old_df.columns) 776 777 if not new_columns.issuperset(old_columns): 778 return None 779 780 for col in old_columns: 781 if new_df[col].dtype != old_df[col].dtype or new_df[col].shape != old_df[col].shape: 782 return None 783 elif is_numeric_dtype(new_df[col]): 784 if not all(np.isclose(new_df[col], old_df[col])): 785 return None 786 else: 787 if not new_df[col].equals(old_df[col]): 788 return None 789 790 return False 791 792 def __repr__(self) -> str: 793 return f"Model<name: {self.name}, seed: {self.kind.path}>"
The model definition which uses a pre-built static dataset to source the data from.
Arguments:
- seed: The content of a pre-built static dataset.
722 def render( 723 self, 724 context: ExecutionContext, 725 *, 726 start: t.Optional[TimeLike] = None, 727 end: t.Optional[TimeLike] = None, 728 latest: t.Optional[TimeLike] = None, 729 engine_adapter: t.Optional[EngineAdapter] = None, 730 **kwargs: t.Any, 731 ) -> t.Generator[QueryOrDF, None, None]: 732 yield from self.seed.read(batch_size=self.kind.batch_size)
Renders the content of this model in a form of either a SELECT query, executing which the data for this model can be fetched, or a dataframe object which contains the data itself.
The type of the returned object (query or dataframe) depends on whether the model was sourced from a SQL query, a Python script or a pre-built dataset (seed).
Arguments:
- context: The execution context used for fetching data.
- start: The start date/time of the run.
- end: The end date/time of the run.
- latest: The latest date/time to use for the run.
Returns:
A generator which yields eiether a query object or one of the supported dataframe objects.
734 def text_diff(self, other: Model) -> str: 735 if not isinstance(other, SeedModel): 736 return super().text_diff(other) 737 738 meta_a = self.render_definition()[0] 739 meta_b = other.render_definition()[0] 740 return "\n".join( 741 ( 742 d.text_diff(meta_a, meta_b, self.dialect), 743 *unified_diff( 744 self.seed.content.split("\n"), 745 other.seed.content.split("\n"), 746 ), 747 ) 748 ).strip()
Produce a text diff against another model.
Arguments:
- other: The model to diff against.
Returns:
A unified text diff showing additions and deletions.
Returns the mapping of column names to types of this model.
767 def is_breaking_change(self, previous: Model) -> t.Optional[bool]: 768 if not isinstance(previous, SeedModel): 769 return None 770 771 new_df = pd.concat([df for df in self.seed.read()]) 772 old_df = pd.concat([df for df in previous.seed.read()]) 773 774 new_columns = set(new_df.columns) 775 old_columns = set(old_df.columns) 776 777 if not new_columns.issuperset(old_columns): 778 return None 779 780 for col in old_columns: 781 if new_df[col].dtype != old_df[col].dtype or new_df[col].shape != old_df[col].shape: 782 return None 783 elif is_numeric_dtype(new_df[col]): 784 if not all(np.isclose(new_df[col], old_df[col])): 785 return None 786 else: 787 if not new_df[col].equals(old_df[col]): 788 return None 789 790 return False
Determines whether this model is a breaking change in relation to the previous
model.
Arguments:
- previous: The previous model to compare against.
Returns:
True if this model instance represents a breaking change, False if it's a non-breaking change and None if the nature of the change can't be determined.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
- _Model
- render_definition
- render_query
- ctas_query
- run_pre_hooks
- run_post_hooks
- referenced_audits
- update_schema
- set_time_format
- convert_to_time_column
- depends_on
- annotated
- sorted_python_env
- macro_definitions
- sql_statements
- contains_star_query
- validate_definition
796class PythonModel(_Model): 797 """The model definition which relies on a Python script to fetch the data. 798 799 Args: 800 entrypoint: The name of a Python function which contains the data fetching / transformation logic. 801 """ 802 803 entrypoint: str 804 source_type: Literal["python"] = "python" 805 806 def render( 807 self, 808 context: ExecutionContext, 809 *, 810 start: t.Optional[TimeLike] = None, 811 end: t.Optional[TimeLike] = None, 812 latest: t.Optional[TimeLike] = None, 813 engine_adapter: t.Optional[EngineAdapter] = None, 814 **kwargs: t.Any, 815 ) -> t.Generator[DF, None, None]: 816 env = prepare_env(self.python_env) 817 start, end = make_inclusive(start or c.EPOCH, end or c.EPOCH) 818 latest = to_datetime(latest or c.EPOCH) 819 try: 820 df_or_iter = env[self.entrypoint]( 821 context=context, start=start, end=end, latest=latest, **kwargs 822 ) 823 824 if not isinstance(df_or_iter, types.GeneratorType): 825 df_or_iter = [df_or_iter] 826 827 for df in df_or_iter: 828 if self.kind.is_incremental_by_time_range: 829 assert self.time_column 830 831 if PySparkDataFrame is not None and isinstance(df, PySparkDataFrame): 832 import pyspark 833 834 df = df.where( 835 pyspark.sql.functions.col(self.time_column.column).between( 836 pyspark.sql.functions.lit( 837 self.convert_to_time_column(start).sql("spark") 838 ), 839 pyspark.sql.functions.lit( 840 self.convert_to_time_column(end).sql("spark") 841 ), 842 ) 843 ) 844 else: 845 assert self.time_column.format, "Time column format is required." 846 df = filter_df_by_timelike( 847 df, self.time_column.column, self.time_column.format, start, end 848 ) 849 yield df 850 except Exception as e: 851 print_exception(e, self.python_env) 852 raise SQLMeshError(f"Error executing Python model '{self.name}'") 853 854 def render_definition(self, include_python: bool = True) -> t.List[exp.Expression]: 855 # Ignore the provided value for the include_python flag, since the Pyhon model's 856 # definition without Python code is meaningless. 857 return super().render_definition(include_python=True) 858 859 @property 860 def is_python(self) -> bool: 861 return True 862 863 def __repr__(self) -> str: 864 return f"Model<name: {self.name}, entrypoint: {self.entrypoint}>"
The model definition which relies on a Python script to fetch the data.
Arguments:
- entrypoint: The name of a Python function which contains the data fetching / transformation logic.
806 def render( 807 self, 808 context: ExecutionContext, 809 *, 810 start: t.Optional[TimeLike] = None, 811 end: t.Optional[TimeLike] = None, 812 latest: t.Optional[TimeLike] = None, 813 engine_adapter: t.Optional[EngineAdapter] = None, 814 **kwargs: t.Any, 815 ) -> t.Generator[DF, None, None]: 816 env = prepare_env(self.python_env) 817 start, end = make_inclusive(start or c.EPOCH, end or c.EPOCH) 818 latest = to_datetime(latest or c.EPOCH) 819 try: 820 df_or_iter = env[self.entrypoint]( 821 context=context, start=start, end=end, latest=latest, **kwargs 822 ) 823 824 if not isinstance(df_or_iter, types.GeneratorType): 825 df_or_iter = [df_or_iter] 826 827 for df in df_or_iter: 828 if self.kind.is_incremental_by_time_range: 829 assert self.time_column 830 831 if PySparkDataFrame is not None and isinstance(df, PySparkDataFrame): 832 import pyspark 833 834 df = df.where( 835 pyspark.sql.functions.col(self.time_column.column).between( 836 pyspark.sql.functions.lit( 837 self.convert_to_time_column(start).sql("spark") 838 ), 839 pyspark.sql.functions.lit( 840 self.convert_to_time_column(end).sql("spark") 841 ), 842 ) 843 ) 844 else: 845 assert self.time_column.format, "Time column format is required." 846 df = filter_df_by_timelike( 847 df, self.time_column.column, self.time_column.format, start, end 848 ) 849 yield df 850 except Exception as e: 851 print_exception(e, self.python_env) 852 raise SQLMeshError(f"Error executing Python model '{self.name}'")
Renders the content of this model in a form of either a SELECT query, executing which the data for this model can be fetched, or a dataframe object which contains the data itself.
The type of the returned object (query or dataframe) depends on whether the model was sourced from a SQL query, a Python script or a pre-built dataset (seed).
Arguments:
- context: The execution context used for fetching data.
- start: The start date/time of the run.
- end: The end date/time of the run.
- latest: The latest date/time to use for the run.
Returns:
A generator which yields eiether a query object or one of the supported dataframe objects.
854 def render_definition(self, include_python: bool = True) -> t.List[exp.Expression]: 855 # Ignore the provided value for the include_python flag, since the Pyhon model's 856 # definition without Python code is meaningless. 857 return super().render_definition(include_python=True)
Returns the original list of sql expressions comprising the model definition.
Arguments:
- include_python: Whether or not to include Python code in the rendered definition.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
- _Model
- render_query
- ctas_query
- run_pre_hooks
- run_post_hooks
- referenced_audits
- update_schema
- text_diff
- set_time_format
- convert_to_time_column
- depends_on
- columns_to_types
- annotated
- sorted_python_env
- macro_definitions
- sql_statements
- contains_star_query
- validate_definition
- is_breaking_change
870def load_model( 871 expressions: t.List[exp.Expression], 872 *, 873 defaults: t.Optional[t.Dict[str, t.Any]] = None, 874 path: Path = Path(), 875 module_path: Path = Path(), 876 time_column_format: str = c.DEFAULT_TIME_COLUMN_FORMAT, 877 macros: t.Optional[MacroRegistry] = None, 878 hooks: t.Optional[HookRegistry] = None, 879 python_env: t.Optional[t.Dict[str, Executable]] = None, 880 dialect: t.Optional[str] = None, 881 **kwargs: t.Any, 882) -> Model: 883 """Load a model from a parsed SQLMesh model file. 884 885 Args: 886 expressions: Model, *Statements, Query. 887 defaults: Definition default values. 888 path: An optional path to the model definition file. 889 module_path: The python module path to serialize macros for. 890 time_column_format: The default time column format to use if no model time column is configured. 891 macros: The custom registry of macros. If not provided the default registry will be used. 892 hooks: The custom registry of hooks. If not provided the default registry will be used. 893 python_env: The custom Python environment for hooks/macros. If not provided the environment will be constructed 894 from the macro registry. 895 dialect: The default dialect if no model dialect is configured. 896 The format must adhere to Python's strftime codes. 897 kwargs: Additional kwargs to pass to the loader. 898 """ 899 if not expressions: 900 raise_config_error("Incomplete model definition, missing MODEL statement", path) 901 902 dialect = dialect or "" 903 meta = expressions[0] 904 query = expressions[-1] if len(expressions) > 1 else None 905 statements = expressions[1:-1] 906 907 if not isinstance(meta, d.Model): 908 raise_config_error( 909 "MODEL statement is required as the first statement in the definition", 910 path, 911 ) 912 913 meta_fields: t.Dict[str, t.Any] = { 914 "dialect": dialect, 915 "description": "\n".join(comment.strip() for comment in meta.comments) 916 if meta.comments 917 else None, 918 **{prop.name.lower(): prop.args.get("value") for prop in meta.expressions}, 919 **kwargs, 920 } 921 922 name = meta_fields.pop("name", "") 923 if not name: 924 raise_config_error("Model must have a name", path) 925 926 if isinstance(query, d.MacroVar): 927 if python_env is None: 928 raise_config_error("The python environment must be provided for Python models", path) 929 raise 930 931 return create_python_model( 932 name, 933 query.name, 934 python_env, 935 defaults=defaults, 936 path=path, 937 time_column_format=time_column_format, 938 **meta_fields, 939 ) 940 elif query is not None: 941 return create_sql_model( 942 name, 943 query, 944 statements=statements, 945 defaults=defaults, 946 path=path, 947 module_path=module_path, 948 time_column_format=time_column_format, 949 macros=macros, 950 hooks=hooks, 951 python_env=python_env, 952 **meta_fields, 953 ) 954 else: 955 try: 956 seed_properties = { 957 p.name.lower(): p.args.get("value") for p in meta_fields.pop("kind").expressions 958 } 959 return create_seed_model( 960 name, 961 SeedKind(**seed_properties), 962 defaults=defaults, 963 path=path, 964 **meta_fields, 965 ) 966 except Exception: 967 raise_config_error( 968 "The model definition must either have a SELECT query or a valid Seed kind", 969 path, 970 ) 971 raise
Load a model from a parsed SQLMesh model file.
Arguments:
- expressions: Model, *Statements, Query.
- defaults: Definition default values.
- path: An optional path to the model definition file.
- module_path: The python module path to serialize macros for.
- time_column_format: The default time column format to use if no model time column is configured.
- macros: The custom registry of macros. If not provided the default registry will be used.
- hooks: The custom registry of hooks. If not provided the default registry will be used.
- python_env: The custom Python environment for hooks/macros. If not provided the environment will be constructed from the macro registry.
- dialect: The default dialect if no model dialect is configured. The format must adhere to Python's strftime codes.
- kwargs: Additional kwargs to pass to the loader.
974def create_sql_model( 975 name: str, 976 query: exp.Expression, 977 *, 978 statements: t.Optional[t.List[exp.Expression]] = None, 979 defaults: t.Optional[t.Dict[str, t.Any]] = None, 980 path: Path = Path(), 981 module_path: Path = Path(), 982 time_column_format: str = c.DEFAULT_TIME_COLUMN_FORMAT, 983 macros: t.Optional[MacroRegistry] = None, 984 hooks: t.Optional[HookRegistry] = None, 985 python_env: t.Optional[t.Dict[str, Executable]] = None, 986 dialect: t.Optional[str] = None, 987 **kwargs: t.Any, 988) -> Model: 989 """Creates a SQL model. 990 991 Args: 992 name: The name of the model, which is of the form [catalog].[db].table. 993 The catalog and db are optional. 994 query: The model's logic in a form of a SELECT query. 995 statements: The list of all SQL statements that are not a query or a model definition. 996 defaults: Definition default values. 997 path: An optional path to the model definition file. 998 module_path: The python module path to serialize macros for. 999 time_column_format: The default time column format to use if no model time column is configured. 1000 macros: The custom registry of macros. If not provided the default registry will be used. 1001 hooks: The custom registry of hooks. If not provided the default registry will be used. 1002 python_env: The custom Python environment for hooks/macros. If not provided the environment will be constructed 1003 from the macro registry. 1004 dialect: The default dialect if no model dialect is configured. 1005 The format must adhere to Python's strftime codes. 1006 """ 1007 if not isinstance(query, (exp.Subqueryable, d.Jinja)): 1008 raise_config_error( 1009 "A query is required and must be a SELECT or UNION statement.", 1010 path, 1011 ) 1012 1013 if not python_env: 1014 python_env = _python_env( 1015 query, 1016 _extract_hooks(kwargs), 1017 module_path, 1018 macros or macro.get_registry(), 1019 hooks or hook.get_registry(), 1020 ) 1021 1022 return _create_model( 1023 SqlModel, 1024 name, 1025 defaults=defaults, 1026 path=path, 1027 time_column_format=time_column_format, 1028 python_env=python_env, 1029 dialect=dialect, 1030 expressions=statements or [], 1031 query=query, 1032 **kwargs, 1033 )
Creates a SQL model.
Arguments:
- name: The name of the model, which is of the form [catalog].[db].table. The catalog and db are optional.
- query: The model's logic in a form of a SELECT query.
- statements: The list of all SQL statements that are not a query or a model definition.
- defaults: Definition default values.
- path: An optional path to the model definition file.
- module_path: The python module path to serialize macros for.
- time_column_format: The default time column format to use if no model time column is configured.
- macros: The custom registry of macros. If not provided the default registry will be used.
- hooks: The custom registry of hooks. If not provided the default registry will be used.
- python_env: The custom Python environment for hooks/macros. If not provided the environment will be constructed from the macro registry.
- dialect: The default dialect if no model dialect is configured. The format must adhere to Python's strftime codes.
1036def create_seed_model( 1037 name: str, 1038 seed_kind: SeedKind, 1039 *, 1040 defaults: t.Optional[t.Dict[str, t.Any]] = None, 1041 path: Path = Path(), 1042 **kwargs: t.Any, 1043) -> Model: 1044 """Creates a Seed model. 1045 1046 Args: 1047 name: The name of the model, which is of the form [catalog].[db].table. 1048 The catalog and db are optional. 1049 seed_kind: The information about the location of a seed and other related configuration. 1050 defaults: Definition default values. 1051 path: An optional path to the model definition file. 1052 """ 1053 seed_path = Path(seed_kind.path) 1054 if not seed_path.is_absolute(): 1055 seed_path = path / seed_path if path.is_dir() else path.parents[0] / seed_path 1056 seed = create_seed(seed_path) 1057 return _create_model( 1058 SeedModel, 1059 name, 1060 defaults=defaults, 1061 path=path, 1062 seed=seed, 1063 kind=seed_kind, 1064 **kwargs, 1065 )
Creates a Seed model.
Arguments:
- name: The name of the model, which is of the form [catalog].[db].table. The catalog and db are optional.
- seed_kind: The information about the location of a seed and other related configuration.
- defaults: Definition default values.
- path: An optional path to the model definition file.
1068def create_python_model( 1069 name: str, 1070 entrypoint: str, 1071 python_env: t.Dict[str, Executable], 1072 *, 1073 defaults: t.Optional[t.Dict[str, t.Any]] = None, 1074 path: Path = Path(), 1075 time_column_format: str = c.DEFAULT_TIME_COLUMN_FORMAT, 1076 depends_on: t.Optional[t.Set[str]] = None, 1077 **kwargs: t.Any, 1078) -> Model: 1079 """Creates a Python model. 1080 1081 Args: 1082 name: The name of the model, which is of the form [catalog].[db].table. 1083 The catalog and db are optional. 1084 entrypoint: The name of a Python function which contains the data fetching / transformation logic. 1085 python_env: The Python environment of all objects referenced by the model implementation. 1086 defaults: Definition default values. 1087 path: An optional path to the model definition file. 1088 time_column_format: The default time column format to use if no model time column is configured. 1089 depends_on: The custom set of model's upstream dependencies. 1090 """ 1091 # Find dependencies for python models by parsing code if they are not explicitly defined 1092 depends_on = ( 1093 _parse_depends_on(entrypoint, python_env) 1094 if depends_on is None and python_env is not None 1095 else None 1096 ) 1097 return _create_model( 1098 PythonModel, 1099 name, 1100 defaults=defaults, 1101 path=path, 1102 time_column_format=time_column_format, 1103 depends_on=depends_on, 1104 entrypoint=entrypoint, 1105 python_env=python_env, 1106 **kwargs, 1107 )
Creates a Python model.
Arguments:
- name: The name of the model, which is of the form [catalog].[db].table. The catalog and db are optional.
- entrypoint: The name of a Python function which contains the data fetching / transformation logic.
- python_env: The Python environment of all objects referenced by the model implementation.
- defaults: Definition default values.
- path: An optional path to the model definition file.
- time_column_format: The default time column format to use if no model time column is configured.
- depends_on: The custom set of model's upstream dependencies.