Edit on GitHub

sqlmesh.core.snapshot.definition

  1from __future__ import annotations
  2
  3import typing as t
  4import zlib
  5from collections import defaultdict
  6from enum import IntEnum
  7
  8from croniter import croniter_range
  9from pydantic import validator
 10from sqlglot import exp
 11
 12from sqlmesh.core import constants as c
 13from sqlmesh.core.audit import Audit
 14from sqlmesh.core.model import (
 15    Model,
 16    PythonModel,
 17    SeedModel,
 18    SqlModel,
 19    kind,
 20    parse_model_name,
 21)
 22from sqlmesh.core.model.meta import HookCall
 23from sqlmesh.utils.date import (
 24    TimeLike,
 25    make_inclusive,
 26    now,
 27    now_timestamp,
 28    to_datetime,
 29    to_timestamp,
 30)
 31from sqlmesh.utils.errors import SQLMeshError
 32from sqlmesh.utils.pydantic import PydanticModel
 33
 34Interval = t.Tuple[int, int]
 35Intervals = t.List[Interval]
 36
 37
 38class SnapshotChangeCategory(IntEnum):
 39    """
 40    Values are ordered by decreasing severity and that ordering is required.
 41
 42    BREAKING: The change requires that snapshot modified and downstream dependencies be rebuilt
 43    NON_BREAKING: The change requires that only the snapshot modified be rebuilt
 44    NO_CHANGE: The change requires no rebuilding
 45    """
 46
 47    BREAKING = 1
 48    NON_BREAKING = 2
 49    FORWARD_ONLY = 3
 50
 51
 52class SnapshotFingerprint(PydanticModel, frozen=True):
 53    data_hash: str
 54    metadata_hash: str
 55    parent_data_hash: str = "0"
 56    parent_metadata_hash: str = "0"
 57
 58    def to_version(self) -> str:
 59        return _hash([self.data_hash, self.parent_data_hash])
 60
 61    def to_identifier(self) -> str:
 62        return _hash(
 63            [
 64                self.data_hash,
 65                self.metadata_hash,
 66                self.parent_data_hash,
 67                self.parent_metadata_hash,
 68            ]
 69        )
 70
 71
 72class SnapshotId(PydanticModel, frozen=True):
 73    name: str
 74    identifier: str
 75
 76    @property
 77    def snapshot_id(self) -> SnapshotId:
 78        """Helper method to return self."""
 79        return self
 80
 81
 82class SnapshotNameVersion(PydanticModel, frozen=True):
 83    name: str
 84    version: str
 85
 86
 87class SnapshotDataVersion(PydanticModel, frozen=True):
 88    fingerprint: SnapshotFingerprint
 89    version: str
 90    change_category: t.Optional[SnapshotChangeCategory]
 91
 92    @property
 93    def data_version(self) -> SnapshotDataVersion:
 94        return self
 95
 96    @property
 97    def is_new_version(self) -> bool:
 98        """Returns whether or not this version is new and requires a backfill."""
 99        return self.fingerprint.to_version() == self.version
100
101
102class QualifiedViewName(PydanticModel, frozen=True):
103    catalog: t.Optional[str]
104    schema_name: t.Optional[str]
105    table: str
106
107    def for_environment(self, environment: str) -> str:
108        return ".".join(
109            p
110            for p in (
111                self.catalog,
112                self.schema_for_environment(environment),
113                self.table,
114            )
115            if p is not None
116        )
117
118    def schema_for_environment(self, environment: str) -> str:
119        schema = self.schema_name or "default"
120        if environment.lower() != c.PROD:
121            schema = f"{schema}__{environment}"
122        return schema
123
124
125class SnapshotInfoMixin:
126    name: str
127    fingerprint: SnapshotFingerprint
128    physical_schema: str
129    previous_versions: t.Tuple[SnapshotDataVersion, ...] = ()
130
131    def is_temporary_table(self, is_dev: bool) -> bool:
132        """Provided whether the snapshot is used in a development mode or not, returns True
133        if the snapshot targets a temporary table or a clone and False otherwise.
134        """
135        return is_dev and not self.is_new_version
136
137    @property
138    def identifier(self) -> str:
139        return self.fingerprint.to_identifier()
140
141    @property
142    def snapshot_id(self) -> SnapshotId:
143        return SnapshotId(name=self.name, identifier=self.identifier)
144
145    @property
146    def qualified_view_name(self) -> QualifiedViewName:
147        (catalog, schema, table) = parse_model_name(self.name)
148        return QualifiedViewName(catalog=catalog, schema_name=schema, table=table)
149
150    @property
151    def previous_version(self) -> t.Optional[SnapshotDataVersion]:
152        """Helper method to get the previous data version."""
153        if self.previous_versions:
154            return self.previous_versions[-1]
155        return None
156
157    @property
158    def data_version(self) -> SnapshotDataVersion:
159        raise NotImplementedError
160
161    @property
162    def is_new_version(self) -> bool:
163        raise NotImplementedError
164
165    @property
166    def is_forward_only(self) -> bool:
167        return not self.data_hash_matches(self.previous_version) and not self.is_new_version
168
169    @property
170    def all_versions(self) -> t.Tuple[SnapshotDataVersion, ...]:
171        """Returns previous versions with the current version trimmed to DATA_VERSION_LIMIT."""
172        return (*self.previous_versions, self.data_version)[-c.DATA_VERSION_LIMIT :]
173
174    def data_hash_matches(self, other: t.Optional[SnapshotInfoMixin | SnapshotDataVersion]) -> bool:
175        return other is not None and self.fingerprint.data_hash == other.fingerprint.data_hash
176
177    def _table_name(self, version: str, is_dev: bool, for_read: bool) -> str:
178        """Full table name pointing to the materialized location of the snapshot.
179
180        Args:
181            version: The snapshot version.
182            is_dev: Whether the table name will be used in development mode.
183            for_read: Whether the table name will be used for reading by a different snapshot.
184        """
185        if is_dev and for_read:
186            # If this snapshot is used for reading, return a temporary table
187            # only if this snapshot captures direct changes applied to its model.
188            version = self.fingerprint.to_version() if self.is_forward_only else version
189            is_temp = self.is_temporary_table(True) and self.is_forward_only
190        elif is_dev:
191            version = self.fingerprint.to_version()
192            is_temp = self.is_temporary_table(True)
193        else:
194            is_temp = False
195
196        return table_name(
197            self.physical_schema,
198            self.name,
199            version,
200            is_temp=is_temp,
201        )
202
203
204class SnapshotTableInfo(PydanticModel, SnapshotInfoMixin, frozen=True):
205    name: str
206    fingerprint: SnapshotFingerprint
207    version: str
208    physical_schema: str
209    parents: t.Tuple[SnapshotId, ...]
210    previous_versions: t.Tuple[SnapshotDataVersion, ...] = ()
211    change_category: t.Optional[SnapshotChangeCategory]
212    is_materialized: bool
213    is_embedded_kind: bool
214
215    def table_name(self, is_dev: bool = False, for_read: bool = False) -> str:
216        """Full table name pointing to the materialized location of the snapshot.
217
218        Args:
219            is_dev: Whether the table name will be used in development mode.
220            for_read: Whether the table name will be used for reading by a different snapshot.
221        """
222        return self._table_name(self.version, is_dev, for_read)
223
224    @property
225    def table_info(self) -> SnapshotTableInfo:
226        """Helper method to return self."""
227        return self
228
229    @property
230    def data_version(self) -> SnapshotDataVersion:
231        return SnapshotDataVersion(
232            fingerprint=self.fingerprint,
233            version=self.version,
234            change_category=self.change_category,
235        )
236
237    @property
238    def is_new_version(self) -> bool:
239        """Returns whether or not this version is new and requires a backfill."""
240        return self.fingerprint.to_version() == self.version
241
242
243class Snapshot(PydanticModel, SnapshotInfoMixin):
244    """A snapshot represents a model at a certain point in time.
245
246    Snapshots are used to encapsulate everything needed to evaluate a model.
247    They are standalone objects that hold all state and dynamic content necessary
248    to render a model's query including things like macros. Snapshots also store intervals
249    (timestamp ranges for what data we've processed).
250
251    Models can be dynamically rendered due to macros. Rendering a model to its full extent
252    requires storing variables and macro definitions. We store all of the macro definitions and
253    global variable references in `python_env` in raw text to avoid pickling. The helper methods
254    to achieve this are defined in utils.metaprogramming.
255
256    Args:
257        name: The snapshot name which is the same as the model name and should be unique per model.
258
259        fingerprint: A unique hash of the model definition so that models can be reused across environments.
260        physical_schema: The physical schema that the snapshot is stored in.
261        model: Model object that the snapshot encapsulates.
262        parents: The list of parent snapshots (upstream dependencies).
263        audits: The list of audits used by the model.
264        intervals: List of [start, end) intervals showing which time ranges a snapshot has data for.
265        created_ts: Epoch millis timestamp when a snapshot was first created.
266        updated_ts: Epoch millis timestamp when a snapshot was last updated.
267        ttl: The time-to-live of a snapshot determines when it should be deleted after it's no longer referenced
268            in any environment.
269        previous: The snapshot data version that this snapshot was based on. If this snapshot is new, then previous will be None.
270        version: User specified version for a snapshot that is used for physical storage.
271            By default, the version is the fingerprint, but not all changes to models require a backfill.
272            If a user passes a previous version, that will be used instead and no backfill will be required.
273        change_category: User specified change category indicating which models require backfill from model changes made in this snapshot.
274        unpaused_ts: The timestamp which indicates when this snapshot was unpaused. Unpaused means that
275            this snapshot is evaluated on a recurring basis. None indicates that this snapshot is paused.
276    """
277
278    name: str
279    fingerprint: SnapshotFingerprint
280    physical_schema: str
281    model: Model
282    parents: t.Tuple[SnapshotId, ...]
283    audits: t.Tuple[Audit, ...]
284    intervals: Intervals
285    dev_intervals: Intervals
286    created_ts: int
287    updated_ts: int
288    ttl: str
289    previous_versions: t.Tuple[SnapshotDataVersion, ...] = ()
290    indirect_versions: t.Dict[str, t.Tuple[SnapshotDataVersion, ...]] = {}
291    version: t.Optional[str] = None
292    change_category: t.Optional[SnapshotChangeCategory] = None
293    unpaused_ts: t.Optional[int] = None
294
295    @validator("ttl")
296    @classmethod
297    def _time_delta_must_be_positive(cls, v: str) -> str:
298        current_time = now()
299        if to_datetime(v, current_time) < current_time:
300            raise ValueError(
301                "Must be positive. Use the 'in' keyword to denote a positive time interval. For example, 'in 7 days'."
302            )
303        return v
304
305    @staticmethod
306    def merge_snapshots(
307        targets: t.Iterable[SnapshotIdLike],
308        snapshots: t.Dict[SnapshotId, Snapshot],
309    ) -> t.List[Snapshot]:
310        """Merge target snapshots with others so that each target snapshot has intervals from all other snapshots with the same version.
311
312        Args:
313            targets: Iterable of snapshot-like objects
314            snapshots: Dictionary of snapshot ids to snapshot.
315
316        Returns:
317            List of target snapshots with merged intervals.
318        """
319        merged = []
320        snapshots_by_name_version = defaultdict(list)
321
322        for s in snapshots.values():
323            snapshots_by_name_version[(s.name, s.version)].append(s)
324
325        for snapshot_like in targets:
326            snapshot_id = snapshot_like.snapshot_id
327            snapshot = snapshots.get(snapshot_id)
328            if not snapshot:
329                raise SQLMeshError(f"The snapshot {snapshot_id} was not found")
330
331            snapshot = snapshot.copy()
332            snapshot.intervals = []
333
334            for other in snapshots_by_name_version[(snapshot.name, snapshot.version)]:
335                snapshot.merge_intervals(other)
336
337            merged.append(snapshot)
338
339        return merged
340
341    @classmethod
342    def from_model(
343        cls,
344        model: Model,
345        *,
346        physical_schema: str,
347        models: t.Dict[str, Model],
348        ttl: str = c.DEFAULT_SNAPSHOT_TTL,
349        version: t.Optional[str] = None,
350        audits: t.Optional[t.Dict[str, Audit]] = None,
351        cache: t.Optional[t.Dict[str, SnapshotFingerprint]] = None,
352    ) -> Snapshot:
353        """Creates a new snapshot for a model.
354
355        Args:
356            model: Model to snapshot.
357            physical_schema: The schema of the snapshot which represents where it is stored.
358            models: Dictionary of all models in the graph to make the fingerprint dependent on parent changes.
359                If no dictionary is passed in the fingerprint will not be dependent on a model's parents.
360            ttl: A TTL to determine how long orphaned (snapshots that are not promoted anywhere) should live.
361            version: The version that a snapshot is associated with. Usually set during the planning phase.
362            audits: Available audits by name.
363            cache: Cache of model name to fingerprints.
364
365        Returns:
366            The newly created snapshot.
367        """
368        created_ts = now_timestamp()
369
370        audits = audits or {}
371
372        return cls(
373            name=model.name,
374            fingerprint=fingerprint_from_model(
375                model,
376                physical_schema=physical_schema,
377                models=models,
378                audits=audits,
379                cache=cache,
380            ),
381            physical_schema=physical_schema,
382            model=model,
383            parents=tuple(
384                SnapshotId(
385                    name=name,
386                    identifier=fingerprint_from_model(
387                        models[name],
388                        physical_schema=physical_schema,
389                        models=models,
390                        audits=audits,
391                        cache=cache,
392                    ).to_identifier(),
393                )
394                for name in _parents_from_model(model, models)
395            ),
396            audits=tuple(model.referenced_audits(audits)),
397            intervals=[],
398            dev_intervals=[],
399            created_ts=created_ts,
400            updated_ts=created_ts,
401            ttl=ttl,
402            version=version,
403        )
404
405    def __eq__(self, other: t.Any) -> bool:
406        return isinstance(other, Snapshot) and self.fingerprint == other.fingerprint
407
408    def __hash__(self) -> int:
409        return hash((self.__class__, self.fingerprint))
410
411    def add_interval(self, start: TimeLike, end: TimeLike, is_dev: bool = False) -> None:
412        """Add a newly processed time interval to the snapshot.
413
414        The actual stored intervals are [start_ts, end_ts) or start epoch timestamp inclusive and end epoch
415        timestamp exclusive. This allows merging of ranges to be easier.
416
417        Args:
418            start: The start date/time of the interval (inclusive)
419            end: The end date/time of the interval. If end is a date, then it is considered inclusive.
420                If it is a datetime object, then it is exclusive.
421            is_dev: Indicates whether the given interval is being added while in development mode.
422        """
423        is_temp_table = self.is_temporary_table(is_dev)
424        intervals = self.dev_intervals if is_temp_table else self.intervals
425
426        intervals.append(self._inclusive_exclusive(start, end))
427
428        if len(intervals) < 2:
429            return
430
431        merged_intervals = merge_intervals(intervals)
432        if is_temp_table:
433            self.dev_intervals = merged_intervals
434        else:
435            self.intervals = merged_intervals
436
437    def remove_interval(self, start: TimeLike, end: TimeLike) -> None:
438        """Remove an interval from the snapshot.
439
440        Args:
441            start: Start interval to remove.
442            end: End interval to remove.
443        """
444        interval = self._inclusive_exclusive(start, end)
445        self.intervals = remove_interval(self.intervals, *interval)
446        self.dev_intervals = remove_interval(self.dev_intervals, *interval)
447
448    def _inclusive_exclusive(self, start: TimeLike, end: TimeLike) -> t.Tuple[int, int]:
449        start_dt, end_dt = make_inclusive(start, end)
450        start_ts = to_timestamp(self.model.cron_floor(start_dt))
451        end_ts = to_timestamp(self.model.cron_next(end_dt))
452
453        if start_ts >= end_ts:
454            raise ValueError("`end` must be >= `start`")
455        return (start_ts, end_ts)
456
457    def merge_intervals(self, other: Snapshot) -> None:
458        """Inherits intervals from the target snapshot.
459
460        Args:
461            other: The target snapshot to inherit intervals from.
462        """
463        for start, end in other.intervals:
464            self.add_interval(start, end)
465
466    def missing_intervals(
467        self, start: TimeLike, end: TimeLike, latest: t.Optional[TimeLike] = None
468    ) -> Intervals:
469        """Find all missing intervals between [start, end].
470
471        Although the inputs are inclusive, the returned stored intervals are
472        [start_ts, end_ts) or start epoch timestamp inclusive and end epoch
473        timestamp exclusive.
474
475        Args:
476            start: The start date/time of the interval (inclusive)
477            end: The end date/time of the interval (inclusive)
478
479        Returns:
480            A list of all the missing intervals as epoch timestamps.
481        """
482        if self.is_embedded_kind:
483            return []
484
485        start_dt, end_dt = make_inclusive(start, self.model.cron_floor(end))
486
487        if self.is_full_kind or self.is_view_kind or self.is_seed_kind:
488            latest_dt = to_datetime(self.model.cron_floor(latest or now()))
489            latest_ts = to_timestamp(latest_dt)
490            # if the latest ts is stored in the last interval, nothing is missing
491            # else returns the latest ts with the exclusive end ts.
492            if self.intervals and self.intervals[-1][1] >= latest_ts:
493                return []
494            return [(to_timestamp(self.model.cron_prev(latest_dt)), latest_ts)]
495
496        missing = []
497        dates = list(croniter_range(start_dt, end_dt, self.model.normalized_cron()))
498        size = len(dates)
499
500        for i in range(size):
501            current_ts = to_timestamp(dates[i])
502            end_ts = (
503                to_timestamp(dates[i + 1])
504                if i + 1 < size
505                else to_timestamp(self.model.cron_next(current_ts))
506            )
507
508            for low, high in self.intervals:
509                if current_ts < low:
510                    missing.append((current_ts, end_ts))
511                    break
512                elif current_ts < high:
513                    break
514            else:
515                missing.append((current_ts, end_ts))
516
517        return missing
518
519    def set_version(
520        self,
521        version: t.Optional[str | SnapshotDataVersion | SnapshotTableInfo | Snapshot] = None,
522    ) -> None:
523        """Set the version of this snapshot.
524
525        If no version is passed, the fingerprint of the snapshot will be used.
526
527        Args:
528            version: Either a string or a TableInfo to use.
529        """
530        if isinstance(version, (SnapshotDataVersion, SnapshotTableInfo, Snapshot)):
531            self.version = version.data_version.version
532        else:
533            self.version = version or self.fingerprint.to_version()
534
535    def set_unpaused_ts(self, unpaused_dt: t.Optional[TimeLike]) -> None:
536        """Sets the timestamp for when this snapshot was unpaused.
537
538        Args:
539            unpaused_dt: The datetime object of when this snapshot was unpaused.
540        """
541        self.unpaused_ts = to_timestamp(self.model.cron_floor(unpaused_dt)) if unpaused_dt else None
542
543    def table_name(self, is_dev: bool = False, for_read: bool = False) -> str:
544        """Full table name pointing to the materialized location of the snapshot.
545
546        Args:
547            is_dev: Whether the table name will be used in development mode.
548            for_read: Whether the table name will be used for reading by a different snapshot.
549        """
550        self._ensure_version()
551        assert self.version
552        return self._table_name(self.version, is_dev, for_read)
553
554    def table_name_for_mapping(self, is_dev: bool = False) -> str:
555        """Full table name used by a child snapshot for table mapping during evaluation.
556
557        Args:
558            is_dev: Whether the table name will be used in development mode.
559        """
560        self._ensure_version()
561        assert self.version
562
563        if is_dev and self.is_forward_only:
564            # If this snapshot is unpaused we shouldn't be using a temporary
565            # table for mapping purposes.
566            is_dev = self.is_paused
567
568        return self._table_name(self.version, is_dev, True)
569
570    def version_get_or_generate(self) -> str:
571        """Helper method to get the version or generate it from the fingerprint."""
572        return self.version or self.fingerprint.to_version()
573
574    @property
575    def table_info(self) -> SnapshotTableInfo:
576        """Helper method to get the SnapshotTableInfo from the Snapshot."""
577        self._ensure_version()
578        return SnapshotTableInfo(
579            physical_schema=self.physical_schema,
580            name=self.name,
581            fingerprint=self.fingerprint,
582            version=self.version,
583            parents=self.parents,
584            previous_versions=self.previous_versions,
585            change_category=self.change_category,
586            is_materialized=self.is_materialized,
587            is_embedded_kind=self.is_embedded_kind,
588        )
589
590    @property
591    def data_version(self) -> SnapshotDataVersion:
592        self._ensure_version()
593        return SnapshotDataVersion(
594            fingerprint=self.fingerprint,
595            version=self.version,
596            change_category=self.change_category,
597        )
598
599    @property
600    def is_new_version(self) -> bool:
601        """Returns whether or not this version is new and requires a backfill."""
602        self._ensure_version()
603        return self.fingerprint.to_version() == self.version
604
605    @property
606    def is_full_kind(self) -> bool:
607        return self.model.kind.is_full
608
609    @property
610    def is_view_kind(self) -> bool:
611        return self.model.kind.is_view
612
613    @property
614    def is_incremental_by_time_range_kind(self) -> bool:
615        return self.model.kind.is_incremental_by_time_range
616
617    @property
618    def is_incremental_by_unique_key_kind(self) -> bool:
619        return self.model.kind.is_incremental_by_unique_key
620
621    # @property
622    # def is_snapshot_kind(self) -> bool:
623    #     return self.model.kind.is_snapshot
624
625    @property
626    def is_embedded_kind(self) -> bool:
627        return self.model.kind.is_embedded
628
629    @property
630    def is_seed_kind(self) -> bool:
631        return self.model.kind.is_seed
632
633    @property
634    def is_materialized(self) -> bool:
635        return self.model.kind.is_materialized
636
637    @property
638    def is_paused(self) -> bool:
639        return self.unpaused_ts is None
640
641    def _ensure_version(self) -> None:
642        if not self.version:
643            raise SQLMeshError(f"Snapshot {self.snapshot_id} has not been versioned yet.")
644
645
646SnapshotIdLike = t.Union[SnapshotId, SnapshotTableInfo, Snapshot]
647SnapshotInfoLike = t.Union[SnapshotTableInfo, Snapshot]
648SnapshotNameVersionLike = t.Union[SnapshotNameVersion, SnapshotTableInfo, Snapshot]
649
650
651def table_name(physical_schema: str, name: str, version: str, is_temp: bool = False) -> str:
652    temp_suffx = "__temp" if is_temp else ""
653    return f"{physical_schema}.{name.replace('.', '__')}__{version}{temp_suffx}"
654
655
656def fingerprint_from_model(
657    model: Model,
658    *,
659    models: t.Dict[str, Model],
660    physical_schema: str = "",
661    audits: t.Optional[t.Dict[str, Audit]] = None,
662    cache: t.Optional[t.Dict[str, SnapshotFingerprint]] = None,
663) -> SnapshotFingerprint:
664    """Helper function to generate a fingerprint based on a model's query and environment.
665
666    This method tries to remove non meaningful differences to avoid ever changing fingerprints.
667    The fingerprint is made up of two parts split by an underscore -- query_metadata. The query hash is
668    determined purely by the rendered query and the metadata by everything else.
669
670    Args:
671        model: Model to fingerprint.
672        physical_schema: The physical_schema of the snapshot which represents where it is stored.
673        models: Dictionary of all models in the graph to make the fingerprint dependent on parent changes.
674            If no dictionary is passed in the fingerprint will not be dependent on a model's parents.
675        audits: Available audits by name.
676        cache: Cache of model name to fingerprints.
677
678    Returns:
679        The fingerprint.
680    """
681    cache = {} if cache is None else cache
682
683    if model.name not in cache:
684        parents = [
685            fingerprint_from_model(
686                models[table],
687                models=models,
688                physical_schema=physical_schema,
689                audits=audits,
690                cache=cache,
691            )
692            for table in model.depends_on
693            if table in models
694        ]
695
696        parent_data_hash = _hash(sorted(p.to_version() for p in parents))
697
698        parent_metadata_hash = _hash(
699            sorted(h for p in parents for h in (p.metadata_hash, p.parent_metadata_hash))
700        )
701
702        cache[model.name] = SnapshotFingerprint(
703            data_hash=_model_data_hash(model, physical_schema),
704            metadata_hash=_model_metadata_hash(model, audits or {}),
705            parent_data_hash=parent_data_hash,
706            parent_metadata_hash=parent_metadata_hash,
707        )
708
709    return cache[model.name]
710
711
712def _model_data_hash(model: Model, physical_schema: str) -> str:
713    def serialize_hooks(hooks: t.List[HookCall]) -> t.Iterable[str]:
714        serialized = []
715        for hook in hooks:
716            if isinstance(hook, exp.Expression):
717                serialized.append(hook.sql())
718            else:
719                name, args = hook
720                serialized.append(
721                    f"{name}:"
722                    + ",".join(
723                        f"{k}={v.sql(identify=True, comments=False)}"
724                        for k, v in sorted(args.items())
725                    )
726                )
727        return serialized
728
729    data = [
730        str(model.sorted_python_env),
731        model.kind.name,
732        model.cron,
733        model.storage_format,
734        physical_schema,
735        *(model.partitioned_by or []),
736        *(expression.sql(identify=True, comments=False) for expression in model.expressions or []),
737        *serialize_hooks(model.pre),
738        *serialize_hooks(model.post),
739        model.stamp,
740    ]
741
742    if isinstance(model, SqlModel):
743        data.append(model.query.sql(identify=True, comments=False))
744
745        for macro_name, macro in sorted(model.jinja_macros.root_macros.items(), key=lambda x: x[0]):
746            data.append(macro_name)
747            data.append(macro.definition)
748
749        for package in model.jinja_macros.packages.values():
750            for macro_name, macro in sorted(package.items(), key=lambda x: x[0]):
751                data.append(macro_name)
752                data.append(macro.definition)
753    elif isinstance(model, PythonModel):
754        data.append(model.entrypoint)
755        for column_name, column_type in model.columns_to_types.items():
756            data.append(column_name)
757            data.append(str(column_type))
758    elif isinstance(model, SeedModel):
759        data.append(str(model.kind.batch_size))
760        data.append(model.seed.content)
761        for column_name, column_type in (model.columns_to_types_ or {}).items():
762            data.append(column_name)
763            data.append(column_type.sql())
764
765    if isinstance(model.kind, kind.IncrementalByTimeRangeKind):
766        data.append(model.kind.time_column.column)
767        data.append(model.kind.time_column.format)
768    elif isinstance(model.kind, kind.IncrementalByUniqueKeyKind):
769        data.extend(model.kind.unique_key)
770
771    return _hash(data)
772
773
774def _model_metadata_hash(model: Model, audits: t.Dict[str, Audit]) -> str:
775    metadata = [
776        model.dialect,
777        model.owner,
778        model.description,
779        str(to_timestamp(model.start)) if model.start else None,
780        str(model.batch_size) if model.batch_size is not None else None,
781    ]
782
783    for audit_name, audit_args in sorted(model.audits, key=lambda a: a[0]):
784        if audit_name not in audits:
785            continue
786
787        audit = audits[audit_name]
788        metadata.extend(
789            [
790                audit.name,
791                audit.render_query(model, **t.cast(t.Dict[str, t.Any], audit_args)).sql(
792                    identify=True, comments=True
793                ),
794                audit.dialect,
795                str(audit.skip),
796                str(audit.blocking),
797            ]
798        )
799
800    # Add comments from the model query.
801    for e, _, _ in model.render_query().walk():
802        if e.comments:
803            metadata.extend(e.comments)
804
805    return _hash(metadata)
806
807
808def _hash(data: t.Iterable[t.Optional[str]]) -> str:
809    return str(zlib.crc32(";".join("" if d is None else d for d in data).encode("utf-8")))
810
811
812def _parents_from_model(
813    model: Model,
814    models: t.Dict[str, Model],
815) -> t.Set[str]:
816    parent_tables = set()
817    for table in model.depends_on:
818        if table in models:
819            parent_tables.add(table)
820            if models[table].kind.is_embedded:
821                parent_tables.update(_parents_from_model(models[table], models))
822
823    return parent_tables
824
825
826def merge_intervals(intervals: Intervals) -> Intervals:
827    """Merge a list of intervals.
828
829    Args:
830        intervals: A list of intervals to merge together.
831
832    Returns:
833        A new list of sorted and merged intervals.
834    """
835    intervals = sorted(intervals)
836
837    merged = [intervals[0]]
838
839    for interval in intervals[1:]:
840        current = merged[-1]
841
842        if interval[0] <= current[1]:
843            merged[-1] = (current[0], max(current[1], interval[1]))
844        else:
845            merged.append(interval)
846
847    return merged
848
849
850def remove_interval(intervals: Intervals, remove_start: int, remove_end: int) -> Intervals:
851    """Remove an interval from a list of intervals.
852
853    Args:
854        intervals: A list of exclusive intervals.
855        remove_start: The inclusive start to remove.
856        remove_end: The exclusive end to remove.
857
858    Returns:
859        A new list of intervals.
860    """
861    modified: Intervals = []
862
863    for start, end in intervals:
864        if remove_start > start and remove_end < end:
865            modified.extend(
866                (
867                    (start, remove_start),
868                    (remove_end, end),
869                )
870            )
871        elif remove_start > start:
872            modified.append((start, min(remove_start, end)))
873        elif remove_end < end:
874            modified.append((max(remove_end, start), end))
875
876    return modified
877
878
879def to_table_mapping(snapshots: t.Iterable[Snapshot], is_dev: bool) -> t.Dict[str, str]:
880    return {
881        snapshot.name: snapshot.table_name_for_mapping(is_dev=is_dev)
882        for snapshot in snapshots
883        if snapshot.version and not snapshot.is_embedded_kind
884    }
class SnapshotChangeCategory(enum.IntEnum):
39class SnapshotChangeCategory(IntEnum):
40    """
41    Values are ordered by decreasing severity and that ordering is required.
42
43    BREAKING: The change requires that snapshot modified and downstream dependencies be rebuilt
44    NON_BREAKING: The change requires that only the snapshot modified be rebuilt
45    NO_CHANGE: The change requires no rebuilding
46    """
47
48    BREAKING = 1
49    NON_BREAKING = 2
50    FORWARD_ONLY = 3

Values are ordered by decreasing severity and that ordering is required.

BREAKING: The change requires that snapshot modified and downstream dependencies be rebuilt NON_BREAKING: The change requires that only the snapshot modified be rebuilt NO_CHANGE: The change requires no rebuilding

Inherited Members
enum.Enum
name
value
builtins.int
conjugate
bit_length
bit_count
to_bytes
from_bytes
as_integer_ratio
real
imag
numerator
denominator
class SnapshotFingerprint(sqlmesh.utils.pydantic.PydanticModel):
53class SnapshotFingerprint(PydanticModel, frozen=True):
54    data_hash: str
55    metadata_hash: str
56    parent_data_hash: str = "0"
57    parent_metadata_hash: str = "0"
58
59    def to_version(self) -> str:
60        return _hash([self.data_hash, self.parent_data_hash])
61
62    def to_identifier(self) -> str:
63        return _hash(
64            [
65                self.data_hash,
66                self.metadata_hash,
67                self.parent_data_hash,
68                self.parent_metadata_hash,
69            ]
70        )
def to_version(self) -> str:
59    def to_version(self) -> str:
60        return _hash([self.data_hash, self.parent_data_hash])
def to_identifier(self) -> str:
62    def to_identifier(self) -> str:
63        return _hash(
64            [
65                self.data_hash,
66                self.metadata_hash,
67                self.parent_data_hash,
68                self.parent_metadata_hash,
69            ]
70        )
Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class SnapshotId(sqlmesh.utils.pydantic.PydanticModel):
73class SnapshotId(PydanticModel, frozen=True):
74    name: str
75    identifier: str
76
77    @property
78    def snapshot_id(self) -> SnapshotId:
79        """Helper method to return self."""
80        return self

Helper method to return self.

Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class SnapshotNameVersion(sqlmesh.utils.pydantic.PydanticModel):
83class SnapshotNameVersion(PydanticModel, frozen=True):
84    name: str
85    version: str
Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class SnapshotDataVersion(sqlmesh.utils.pydantic.PydanticModel):
 88class SnapshotDataVersion(PydanticModel, frozen=True):
 89    fingerprint: SnapshotFingerprint
 90    version: str
 91    change_category: t.Optional[SnapshotChangeCategory]
 92
 93    @property
 94    def data_version(self) -> SnapshotDataVersion:
 95        return self
 96
 97    @property
 98    def is_new_version(self) -> bool:
 99        """Returns whether or not this version is new and requires a backfill."""
100        return self.fingerprint.to_version() == self.version
is_new_version: bool

Returns whether or not this version is new and requires a backfill.

Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class QualifiedViewName(sqlmesh.utils.pydantic.PydanticModel):
103class QualifiedViewName(PydanticModel, frozen=True):
104    catalog: t.Optional[str]
105    schema_name: t.Optional[str]
106    table: str
107
108    def for_environment(self, environment: str) -> str:
109        return ".".join(
110            p
111            for p in (
112                self.catalog,
113                self.schema_for_environment(environment),
114                self.table,
115            )
116            if p is not None
117        )
118
119    def schema_for_environment(self, environment: str) -> str:
120        schema = self.schema_name or "default"
121        if environment.lower() != c.PROD:
122            schema = f"{schema}__{environment}"
123        return schema
def for_environment(self, environment: str) -> str:
108    def for_environment(self, environment: str) -> str:
109        return ".".join(
110            p
111            for p in (
112                self.catalog,
113                self.schema_for_environment(environment),
114                self.table,
115            )
116            if p is not None
117        )
def schema_for_environment(self, environment: str) -> str:
119    def schema_for_environment(self, environment: str) -> str:
120        schema = self.schema_name or "default"
121        if environment.lower() != c.PROD:
122            schema = f"{schema}__{environment}"
123        return schema
Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class SnapshotInfoMixin:
126class SnapshotInfoMixin:
127    name: str
128    fingerprint: SnapshotFingerprint
129    physical_schema: str
130    previous_versions: t.Tuple[SnapshotDataVersion, ...] = ()
131
132    def is_temporary_table(self, is_dev: bool) -> bool:
133        """Provided whether the snapshot is used in a development mode or not, returns True
134        if the snapshot targets a temporary table or a clone and False otherwise.
135        """
136        return is_dev and not self.is_new_version
137
138    @property
139    def identifier(self) -> str:
140        return self.fingerprint.to_identifier()
141
142    @property
143    def snapshot_id(self) -> SnapshotId:
144        return SnapshotId(name=self.name, identifier=self.identifier)
145
146    @property
147    def qualified_view_name(self) -> QualifiedViewName:
148        (catalog, schema, table) = parse_model_name(self.name)
149        return QualifiedViewName(catalog=catalog, schema_name=schema, table=table)
150
151    @property
152    def previous_version(self) -> t.Optional[SnapshotDataVersion]:
153        """Helper method to get the previous data version."""
154        if self.previous_versions:
155            return self.previous_versions[-1]
156        return None
157
158    @property
159    def data_version(self) -> SnapshotDataVersion:
160        raise NotImplementedError
161
162    @property
163    def is_new_version(self) -> bool:
164        raise NotImplementedError
165
166    @property
167    def is_forward_only(self) -> bool:
168        return not self.data_hash_matches(self.previous_version) and not self.is_new_version
169
170    @property
171    def all_versions(self) -> t.Tuple[SnapshotDataVersion, ...]:
172        """Returns previous versions with the current version trimmed to DATA_VERSION_LIMIT."""
173        return (*self.previous_versions, self.data_version)[-c.DATA_VERSION_LIMIT :]
174
175    def data_hash_matches(self, other: t.Optional[SnapshotInfoMixin | SnapshotDataVersion]) -> bool:
176        return other is not None and self.fingerprint.data_hash == other.fingerprint.data_hash
177
178    def _table_name(self, version: str, is_dev: bool, for_read: bool) -> str:
179        """Full table name pointing to the materialized location of the snapshot.
180
181        Args:
182            version: The snapshot version.
183            is_dev: Whether the table name will be used in development mode.
184            for_read: Whether the table name will be used for reading by a different snapshot.
185        """
186        if is_dev and for_read:
187            # If this snapshot is used for reading, return a temporary table
188            # only if this snapshot captures direct changes applied to its model.
189            version = self.fingerprint.to_version() if self.is_forward_only else version
190            is_temp = self.is_temporary_table(True) and self.is_forward_only
191        elif is_dev:
192            version = self.fingerprint.to_version()
193            is_temp = self.is_temporary_table(True)
194        else:
195            is_temp = False
196
197        return table_name(
198            self.physical_schema,
199            self.name,
200            version,
201            is_temp=is_temp,
202        )
SnapshotInfoMixin()
def is_temporary_table(self, is_dev: bool) -> bool:
132    def is_temporary_table(self, is_dev: bool) -> bool:
133        """Provided whether the snapshot is used in a development mode or not, returns True
134        if the snapshot targets a temporary table or a clone and False otherwise.
135        """
136        return is_dev and not self.is_new_version

Provided whether the snapshot is used in a development mode or not, returns True if the snapshot targets a temporary table or a clone and False otherwise.

Helper method to get the previous data version.

Returns previous versions with the current version trimmed to DATA_VERSION_LIMIT.

def data_hash_matches( self, other: Union[sqlmesh.core.snapshot.definition.SnapshotInfoMixin, sqlmesh.core.snapshot.definition.SnapshotDataVersion, NoneType]) -> bool:
175    def data_hash_matches(self, other: t.Optional[SnapshotInfoMixin | SnapshotDataVersion]) -> bool:
176        return other is not None and self.fingerprint.data_hash == other.fingerprint.data_hash
class SnapshotTableInfo(sqlmesh.utils.pydantic.PydanticModel, SnapshotInfoMixin):
205class SnapshotTableInfo(PydanticModel, SnapshotInfoMixin, frozen=True):
206    name: str
207    fingerprint: SnapshotFingerprint
208    version: str
209    physical_schema: str
210    parents: t.Tuple[SnapshotId, ...]
211    previous_versions: t.Tuple[SnapshotDataVersion, ...] = ()
212    change_category: t.Optional[SnapshotChangeCategory]
213    is_materialized: bool
214    is_embedded_kind: bool
215
216    def table_name(self, is_dev: bool = False, for_read: bool = False) -> str:
217        """Full table name pointing to the materialized location of the snapshot.
218
219        Args:
220            is_dev: Whether the table name will be used in development mode.
221            for_read: Whether the table name will be used for reading by a different snapshot.
222        """
223        return self._table_name(self.version, is_dev, for_read)
224
225    @property
226    def table_info(self) -> SnapshotTableInfo:
227        """Helper method to return self."""
228        return self
229
230    @property
231    def data_version(self) -> SnapshotDataVersion:
232        return SnapshotDataVersion(
233            fingerprint=self.fingerprint,
234            version=self.version,
235            change_category=self.change_category,
236        )
237
238    @property
239    def is_new_version(self) -> bool:
240        """Returns whether or not this version is new and requires a backfill."""
241        return self.fingerprint.to_version() == self.version
def table_name(self, is_dev: bool = False, for_read: bool = False) -> str:
216    def table_name(self, is_dev: bool = False, for_read: bool = False) -> str:
217        """Full table name pointing to the materialized location of the snapshot.
218
219        Args:
220            is_dev: Whether the table name will be used in development mode.
221            for_read: Whether the table name will be used for reading by a different snapshot.
222        """
223        return self._table_name(self.version, is_dev, for_read)

Full table name pointing to the materialized location of the snapshot.

Arguments:
  • is_dev: Whether the table name will be used in development mode.
  • for_read: Whether the table name will be used for reading by a different snapshot.

Helper method to return self.

is_new_version: bool

Returns whether or not this version is new and requires a backfill.

Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
SnapshotInfoMixin
is_temporary_table
previous_version
all_versions
data_hash_matches
244class Snapshot(PydanticModel, SnapshotInfoMixin):
245    """A snapshot represents a model at a certain point in time.
246
247    Snapshots are used to encapsulate everything needed to evaluate a model.
248    They are standalone objects that hold all state and dynamic content necessary
249    to render a model's query including things like macros. Snapshots also store intervals
250    (timestamp ranges for what data we've processed).
251
252    Models can be dynamically rendered due to macros. Rendering a model to its full extent
253    requires storing variables and macro definitions. We store all of the macro definitions and
254    global variable references in `python_env` in raw text to avoid pickling. The helper methods
255    to achieve this are defined in utils.metaprogramming.
256
257    Args:
258        name: The snapshot name which is the same as the model name and should be unique per model.
259
260        fingerprint: A unique hash of the model definition so that models can be reused across environments.
261        physical_schema: The physical schema that the snapshot is stored in.
262        model: Model object that the snapshot encapsulates.
263        parents: The list of parent snapshots (upstream dependencies).
264        audits: The list of audits used by the model.
265        intervals: List of [start, end) intervals showing which time ranges a snapshot has data for.
266        created_ts: Epoch millis timestamp when a snapshot was first created.
267        updated_ts: Epoch millis timestamp when a snapshot was last updated.
268        ttl: The time-to-live of a snapshot determines when it should be deleted after it's no longer referenced
269            in any environment.
270        previous: The snapshot data version that this snapshot was based on. If this snapshot is new, then previous will be None.
271        version: User specified version for a snapshot that is used for physical storage.
272            By default, the version is the fingerprint, but not all changes to models require a backfill.
273            If a user passes a previous version, that will be used instead and no backfill will be required.
274        change_category: User specified change category indicating which models require backfill from model changes made in this snapshot.
275        unpaused_ts: The timestamp which indicates when this snapshot was unpaused. Unpaused means that
276            this snapshot is evaluated on a recurring basis. None indicates that this snapshot is paused.
277    """
278
279    name: str
280    fingerprint: SnapshotFingerprint
281    physical_schema: str
282    model: Model
283    parents: t.Tuple[SnapshotId, ...]
284    audits: t.Tuple[Audit, ...]
285    intervals: Intervals
286    dev_intervals: Intervals
287    created_ts: int
288    updated_ts: int
289    ttl: str
290    previous_versions: t.Tuple[SnapshotDataVersion, ...] = ()
291    indirect_versions: t.Dict[str, t.Tuple[SnapshotDataVersion, ...]] = {}
292    version: t.Optional[str] = None
293    change_category: t.Optional[SnapshotChangeCategory] = None
294    unpaused_ts: t.Optional[int] = None
295
296    @validator("ttl")
297    @classmethod
298    def _time_delta_must_be_positive(cls, v: str) -> str:
299        current_time = now()
300        if to_datetime(v, current_time) < current_time:
301            raise ValueError(
302                "Must be positive. Use the 'in' keyword to denote a positive time interval. For example, 'in 7 days'."
303            )
304        return v
305
306    @staticmethod
307    def merge_snapshots(
308        targets: t.Iterable[SnapshotIdLike],
309        snapshots: t.Dict[SnapshotId, Snapshot],
310    ) -> t.List[Snapshot]:
311        """Merge target snapshots with others so that each target snapshot has intervals from all other snapshots with the same version.
312
313        Args:
314            targets: Iterable of snapshot-like objects
315            snapshots: Dictionary of snapshot ids to snapshot.
316
317        Returns:
318            List of target snapshots with merged intervals.
319        """
320        merged = []
321        snapshots_by_name_version = defaultdict(list)
322
323        for s in snapshots.values():
324            snapshots_by_name_version[(s.name, s.version)].append(s)
325
326        for snapshot_like in targets:
327            snapshot_id = snapshot_like.snapshot_id
328            snapshot = snapshots.get(snapshot_id)
329            if not snapshot:
330                raise SQLMeshError(f"The snapshot {snapshot_id} was not found")
331
332            snapshot = snapshot.copy()
333            snapshot.intervals = []
334
335            for other in snapshots_by_name_version[(snapshot.name, snapshot.version)]:
336                snapshot.merge_intervals(other)
337
338            merged.append(snapshot)
339
340        return merged
341
342    @classmethod
343    def from_model(
344        cls,
345        model: Model,
346        *,
347        physical_schema: str,
348        models: t.Dict[str, Model],
349        ttl: str = c.DEFAULT_SNAPSHOT_TTL,
350        version: t.Optional[str] = None,
351        audits: t.Optional[t.Dict[str, Audit]] = None,
352        cache: t.Optional[t.Dict[str, SnapshotFingerprint]] = None,
353    ) -> Snapshot:
354        """Creates a new snapshot for a model.
355
356        Args:
357            model: Model to snapshot.
358            physical_schema: The schema of the snapshot which represents where it is stored.
359            models: Dictionary of all models in the graph to make the fingerprint dependent on parent changes.
360                If no dictionary is passed in the fingerprint will not be dependent on a model's parents.
361            ttl: A TTL to determine how long orphaned (snapshots that are not promoted anywhere) should live.
362            version: The version that a snapshot is associated with. Usually set during the planning phase.
363            audits: Available audits by name.
364            cache: Cache of model name to fingerprints.
365
366        Returns:
367            The newly created snapshot.
368        """
369        created_ts = now_timestamp()
370
371        audits = audits or {}
372
373        return cls(
374            name=model.name,
375            fingerprint=fingerprint_from_model(
376                model,
377                physical_schema=physical_schema,
378                models=models,
379                audits=audits,
380                cache=cache,
381            ),
382            physical_schema=physical_schema,
383            model=model,
384            parents=tuple(
385                SnapshotId(
386                    name=name,
387                    identifier=fingerprint_from_model(
388                        models[name],
389                        physical_schema=physical_schema,
390                        models=models,
391                        audits=audits,
392                        cache=cache,
393                    ).to_identifier(),
394                )
395                for name in _parents_from_model(model, models)
396            ),
397            audits=tuple(model.referenced_audits(audits)),
398            intervals=[],
399            dev_intervals=[],
400            created_ts=created_ts,
401            updated_ts=created_ts,
402            ttl=ttl,
403            version=version,
404        )
405
406    def __eq__(self, other: t.Any) -> bool:
407        return isinstance(other, Snapshot) and self.fingerprint == other.fingerprint
408
409    def __hash__(self) -> int:
410        return hash((self.__class__, self.fingerprint))
411
412    def add_interval(self, start: TimeLike, end: TimeLike, is_dev: bool = False) -> None:
413        """Add a newly processed time interval to the snapshot.
414
415        The actual stored intervals are [start_ts, end_ts) or start epoch timestamp inclusive and end epoch
416        timestamp exclusive. This allows merging of ranges to be easier.
417
418        Args:
419            start: The start date/time of the interval (inclusive)
420            end: The end date/time of the interval. If end is a date, then it is considered inclusive.
421                If it is a datetime object, then it is exclusive.
422            is_dev: Indicates whether the given interval is being added while in development mode.
423        """
424        is_temp_table = self.is_temporary_table(is_dev)
425        intervals = self.dev_intervals if is_temp_table else self.intervals
426
427        intervals.append(self._inclusive_exclusive(start, end))
428
429        if len(intervals) < 2:
430            return
431
432        merged_intervals = merge_intervals(intervals)
433        if is_temp_table:
434            self.dev_intervals = merged_intervals
435        else:
436            self.intervals = merged_intervals
437
438    def remove_interval(self, start: TimeLike, end: TimeLike) -> None:
439        """Remove an interval from the snapshot.
440
441        Args:
442            start: Start interval to remove.
443            end: End interval to remove.
444        """
445        interval = self._inclusive_exclusive(start, end)
446        self.intervals = remove_interval(self.intervals, *interval)
447        self.dev_intervals = remove_interval(self.dev_intervals, *interval)
448
449    def _inclusive_exclusive(self, start: TimeLike, end: TimeLike) -> t.Tuple[int, int]:
450        start_dt, end_dt = make_inclusive(start, end)
451        start_ts = to_timestamp(self.model.cron_floor(start_dt))
452        end_ts = to_timestamp(self.model.cron_next(end_dt))
453
454        if start_ts >= end_ts:
455            raise ValueError("`end` must be >= `start`")
456        return (start_ts, end_ts)
457
458    def merge_intervals(self, other: Snapshot) -> None:
459        """Inherits intervals from the target snapshot.
460
461        Args:
462            other: The target snapshot to inherit intervals from.
463        """
464        for start, end in other.intervals:
465            self.add_interval(start, end)
466
467    def missing_intervals(
468        self, start: TimeLike, end: TimeLike, latest: t.Optional[TimeLike] = None
469    ) -> Intervals:
470        """Find all missing intervals between [start, end].
471
472        Although the inputs are inclusive, the returned stored intervals are
473        [start_ts, end_ts) or start epoch timestamp inclusive and end epoch
474        timestamp exclusive.
475
476        Args:
477            start: The start date/time of the interval (inclusive)
478            end: The end date/time of the interval (inclusive)
479
480        Returns:
481            A list of all the missing intervals as epoch timestamps.
482        """
483        if self.is_embedded_kind:
484            return []
485
486        start_dt, end_dt = make_inclusive(start, self.model.cron_floor(end))
487
488        if self.is_full_kind or self.is_view_kind or self.is_seed_kind:
489            latest_dt = to_datetime(self.model.cron_floor(latest or now()))
490            latest_ts = to_timestamp(latest_dt)
491            # if the latest ts is stored in the last interval, nothing is missing
492            # else returns the latest ts with the exclusive end ts.
493            if self.intervals and self.intervals[-1][1] >= latest_ts:
494                return []
495            return [(to_timestamp(self.model.cron_prev(latest_dt)), latest_ts)]
496
497        missing = []
498        dates = list(croniter_range(start_dt, end_dt, self.model.normalized_cron()))
499        size = len(dates)
500
501        for i in range(size):
502            current_ts = to_timestamp(dates[i])
503            end_ts = (
504                to_timestamp(dates[i + 1])
505                if i + 1 < size
506                else to_timestamp(self.model.cron_next(current_ts))
507            )
508
509            for low, high in self.intervals:
510                if current_ts < low:
511                    missing.append((current_ts, end_ts))
512                    break
513                elif current_ts < high:
514                    break
515            else:
516                missing.append((current_ts, end_ts))
517
518        return missing
519
520    def set_version(
521        self,
522        version: t.Optional[str | SnapshotDataVersion | SnapshotTableInfo | Snapshot] = None,
523    ) -> None:
524        """Set the version of this snapshot.
525
526        If no version is passed, the fingerprint of the snapshot will be used.
527
528        Args:
529            version: Either a string or a TableInfo to use.
530        """
531        if isinstance(version, (SnapshotDataVersion, SnapshotTableInfo, Snapshot)):
532            self.version = version.data_version.version
533        else:
534            self.version = version or self.fingerprint.to_version()
535
536    def set_unpaused_ts(self, unpaused_dt: t.Optional[TimeLike]) -> None:
537        """Sets the timestamp for when this snapshot was unpaused.
538
539        Args:
540            unpaused_dt: The datetime object of when this snapshot was unpaused.
541        """
542        self.unpaused_ts = to_timestamp(self.model.cron_floor(unpaused_dt)) if unpaused_dt else None
543
544    def table_name(self, is_dev: bool = False, for_read: bool = False) -> str:
545        """Full table name pointing to the materialized location of the snapshot.
546
547        Args:
548            is_dev: Whether the table name will be used in development mode.
549            for_read: Whether the table name will be used for reading by a different snapshot.
550        """
551        self._ensure_version()
552        assert self.version
553        return self._table_name(self.version, is_dev, for_read)
554
555    def table_name_for_mapping(self, is_dev: bool = False) -> str:
556        """Full table name used by a child snapshot for table mapping during evaluation.
557
558        Args:
559            is_dev: Whether the table name will be used in development mode.
560        """
561        self._ensure_version()
562        assert self.version
563
564        if is_dev and self.is_forward_only:
565            # If this snapshot is unpaused we shouldn't be using a temporary
566            # table for mapping purposes.
567            is_dev = self.is_paused
568
569        return self._table_name(self.version, is_dev, True)
570
571    def version_get_or_generate(self) -> str:
572        """Helper method to get the version or generate it from the fingerprint."""
573        return self.version or self.fingerprint.to_version()
574
575    @property
576    def table_info(self) -> SnapshotTableInfo:
577        """Helper method to get the SnapshotTableInfo from the Snapshot."""
578        self._ensure_version()
579        return SnapshotTableInfo(
580            physical_schema=self.physical_schema,
581            name=self.name,
582            fingerprint=self.fingerprint,
583            version=self.version,
584            parents=self.parents,
585            previous_versions=self.previous_versions,
586            change_category=self.change_category,
587            is_materialized=self.is_materialized,
588            is_embedded_kind=self.is_embedded_kind,
589        )
590
591    @property
592    def data_version(self) -> SnapshotDataVersion:
593        self._ensure_version()
594        return SnapshotDataVersion(
595            fingerprint=self.fingerprint,
596            version=self.version,
597            change_category=self.change_category,
598        )
599
600    @property
601    def is_new_version(self) -> bool:
602        """Returns whether or not this version is new and requires a backfill."""
603        self._ensure_version()
604        return self.fingerprint.to_version() == self.version
605
606    @property
607    def is_full_kind(self) -> bool:
608        return self.model.kind.is_full
609
610    @property
611    def is_view_kind(self) -> bool:
612        return self.model.kind.is_view
613
614    @property
615    def is_incremental_by_time_range_kind(self) -> bool:
616        return self.model.kind.is_incremental_by_time_range
617
618    @property
619    def is_incremental_by_unique_key_kind(self) -> bool:
620        return self.model.kind.is_incremental_by_unique_key
621
622    # @property
623    # def is_snapshot_kind(self) -> bool:
624    #     return self.model.kind.is_snapshot
625
626    @property
627    def is_embedded_kind(self) -> bool:
628        return self.model.kind.is_embedded
629
630    @property
631    def is_seed_kind(self) -> bool:
632        return self.model.kind.is_seed
633
634    @property
635    def is_materialized(self) -> bool:
636        return self.model.kind.is_materialized
637
638    @property
639    def is_paused(self) -> bool:
640        return self.unpaused_ts is None
641
642    def _ensure_version(self) -> None:
643        if not self.version:
644            raise SQLMeshError(f"Snapshot {self.snapshot_id} has not been versioned yet.")

A snapshot represents a model at a certain point in time.

Snapshots are used to encapsulate everything needed to evaluate a model. They are standalone objects that hold all state and dynamic content necessary to render a model's query including things like macros. Snapshots also store intervals (timestamp ranges for what data we've processed).

Models can be dynamically rendered due to macros. Rendering a model to its full extent requires storing variables and macro definitions. We store all of the macro definitions and global variable references in python_env in raw text to avoid pickling. The helper methods to achieve this are defined in utils.metaprogramming.

Arguments:
  • name: The snapshot name which is the same as the model name and should be unique per model.
  • fingerprint: A unique hash of the model definition so that models can be reused across environments.
  • physical_schema: The physical schema that the snapshot is stored in.
  • model: Model object that the snapshot encapsulates.
  • parents: The list of parent snapshots (upstream dependencies).
  • audits: The list of audits used by the model.
  • intervals: List of [start, end) intervals showing which time ranges a snapshot has data for.
  • created_ts: Epoch millis timestamp when a snapshot was first created.
  • updated_ts: Epoch millis timestamp when a snapshot was last updated.
  • ttl: The time-to-live of a snapshot determines when it should be deleted after it's no longer referenced in any environment.
  • previous: The snapshot data version that this snapshot was based on. If this snapshot is new, then previous will be None.
  • version: User specified version for a snapshot that is used for physical storage. By default, the version is the fingerprint, but not all changes to models require a backfill. If a user passes a previous version, that will be used instead and no backfill will be required.
  • change_category: User specified change category indicating which models require backfill from model changes made in this snapshot.
  • unpaused_ts: The timestamp which indicates when this snapshot was unpaused. Unpaused means that this snapshot is evaluated on a recurring basis. None indicates that this snapshot is paused.
306    @staticmethod
307    def merge_snapshots(
308        targets: t.Iterable[SnapshotIdLike],
309        snapshots: t.Dict[SnapshotId, Snapshot],
310    ) -> t.List[Snapshot]:
311        """Merge target snapshots with others so that each target snapshot has intervals from all other snapshots with the same version.
312
313        Args:
314            targets: Iterable of snapshot-like objects
315            snapshots: Dictionary of snapshot ids to snapshot.
316
317        Returns:
318            List of target snapshots with merged intervals.
319        """
320        merged = []
321        snapshots_by_name_version = defaultdict(list)
322
323        for s in snapshots.values():
324            snapshots_by_name_version[(s.name, s.version)].append(s)
325
326        for snapshot_like in targets:
327            snapshot_id = snapshot_like.snapshot_id
328            snapshot = snapshots.get(snapshot_id)
329            if not snapshot:
330                raise SQLMeshError(f"The snapshot {snapshot_id} was not found")
331
332            snapshot = snapshot.copy()
333            snapshot.intervals = []
334
335            for other in snapshots_by_name_version[(snapshot.name, snapshot.version)]:
336                snapshot.merge_intervals(other)
337
338            merged.append(snapshot)
339
340        return merged

Merge target snapshots with others so that each target snapshot has intervals from all other snapshots with the same version.

Arguments:
  • targets: Iterable of snapshot-like objects
  • snapshots: Dictionary of snapshot ids to snapshot.
Returns:

List of target snapshots with merged intervals.

@classmethod
def from_model( cls, model: Annotated[Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})], *, physical_schema: str, models: Dict[str, Annotated[Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})]], ttl: str = 'in 1 week', version: Optional[str] = None, audits: Optional[Dict[str, sqlmesh.core.audit.definition.Audit]] = None, cache: Optional[Dict[str, sqlmesh.core.snapshot.definition.SnapshotFingerprint]] = None) -> sqlmesh.core.snapshot.definition.Snapshot:
342    @classmethod
343    def from_model(
344        cls,
345        model: Model,
346        *,
347        physical_schema: str,
348        models: t.Dict[str, Model],
349        ttl: str = c.DEFAULT_SNAPSHOT_TTL,
350        version: t.Optional[str] = None,
351        audits: t.Optional[t.Dict[str, Audit]] = None,
352        cache: t.Optional[t.Dict[str, SnapshotFingerprint]] = None,
353    ) -> Snapshot:
354        """Creates a new snapshot for a model.
355
356        Args:
357            model: Model to snapshot.
358            physical_schema: The schema of the snapshot which represents where it is stored.
359            models: Dictionary of all models in the graph to make the fingerprint dependent on parent changes.
360                If no dictionary is passed in the fingerprint will not be dependent on a model's parents.
361            ttl: A TTL to determine how long orphaned (snapshots that are not promoted anywhere) should live.
362            version: The version that a snapshot is associated with. Usually set during the planning phase.
363            audits: Available audits by name.
364            cache: Cache of model name to fingerprints.
365
366        Returns:
367            The newly created snapshot.
368        """
369        created_ts = now_timestamp()
370
371        audits = audits or {}
372
373        return cls(
374            name=model.name,
375            fingerprint=fingerprint_from_model(
376                model,
377                physical_schema=physical_schema,
378                models=models,
379                audits=audits,
380                cache=cache,
381            ),
382            physical_schema=physical_schema,
383            model=model,
384            parents=tuple(
385                SnapshotId(
386                    name=name,
387                    identifier=fingerprint_from_model(
388                        models[name],
389                        physical_schema=physical_schema,
390                        models=models,
391                        audits=audits,
392                        cache=cache,
393                    ).to_identifier(),
394                )
395                for name in _parents_from_model(model, models)
396            ),
397            audits=tuple(model.referenced_audits(audits)),
398            intervals=[],
399            dev_intervals=[],
400            created_ts=created_ts,
401            updated_ts=created_ts,
402            ttl=ttl,
403            version=version,
404        )

Creates a new snapshot for a model.

Arguments:
  • model: Model to snapshot.
  • physical_schema: The schema of the snapshot which represents where it is stored.
  • models: Dictionary of all models in the graph to make the fingerprint dependent on parent changes. If no dictionary is passed in the fingerprint will not be dependent on a model's parents.
  • ttl: A TTL to determine how long orphaned (snapshots that are not promoted anywhere) should live.
  • version: The version that a snapshot is associated with. Usually set during the planning phase.
  • audits: Available audits by name.
  • cache: Cache of model name to fingerprints.
Returns:

The newly created snapshot.

def add_interval( self, start: Union[datetime.date, datetime.datetime, str, int, float], end: Union[datetime.date, datetime.datetime, str, int, float], is_dev: bool = False) -> None:
412    def add_interval(self, start: TimeLike, end: TimeLike, is_dev: bool = False) -> None:
413        """Add a newly processed time interval to the snapshot.
414
415        The actual stored intervals are [start_ts, end_ts) or start epoch timestamp inclusive and end epoch
416        timestamp exclusive. This allows merging of ranges to be easier.
417
418        Args:
419            start: The start date/time of the interval (inclusive)
420            end: The end date/time of the interval. If end is a date, then it is considered inclusive.
421                If it is a datetime object, then it is exclusive.
422            is_dev: Indicates whether the given interval is being added while in development mode.
423        """
424        is_temp_table = self.is_temporary_table(is_dev)
425        intervals = self.dev_intervals if is_temp_table else self.intervals
426
427        intervals.append(self._inclusive_exclusive(start, end))
428
429        if len(intervals) < 2:
430            return
431
432        merged_intervals = merge_intervals(intervals)
433        if is_temp_table:
434            self.dev_intervals = merged_intervals
435        else:
436            self.intervals = merged_intervals

Add a newly processed time interval to the snapshot.

The actual stored intervals are [start_ts, end_ts) or start epoch timestamp inclusive and end epoch timestamp exclusive. This allows merging of ranges to be easier.

Arguments:
  • start: The start date/time of the interval (inclusive)
  • end: The end date/time of the interval. If end is a date, then it is considered inclusive. If it is a datetime object, then it is exclusive.
  • is_dev: Indicates whether the given interval is being added while in development mode.
def remove_interval( self, start: Union[datetime.date, datetime.datetime, str, int, float], end: Union[datetime.date, datetime.datetime, str, int, float]) -> None:
438    def remove_interval(self, start: TimeLike, end: TimeLike) -> None:
439        """Remove an interval from the snapshot.
440
441        Args:
442            start: Start interval to remove.
443            end: End interval to remove.
444        """
445        interval = self._inclusive_exclusive(start, end)
446        self.intervals = remove_interval(self.intervals, *interval)
447        self.dev_intervals = remove_interval(self.dev_intervals, *interval)

Remove an interval from the snapshot.

Arguments:
  • start: Start interval to remove.
  • end: End interval to remove.
def merge_intervals(self, other: sqlmesh.core.snapshot.definition.Snapshot) -> None:
458    def merge_intervals(self, other: Snapshot) -> None:
459        """Inherits intervals from the target snapshot.
460
461        Args:
462            other: The target snapshot to inherit intervals from.
463        """
464        for start, end in other.intervals:
465            self.add_interval(start, end)

Inherits intervals from the target snapshot.

Arguments:
  • other: The target snapshot to inherit intervals from.
def missing_intervals( self, start: Union[datetime.date, datetime.datetime, str, int, float], end: Union[datetime.date, datetime.datetime, str, int, float], latest: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None) -> List[Tuple[int, int]]:
467    def missing_intervals(
468        self, start: TimeLike, end: TimeLike, latest: t.Optional[TimeLike] = None
469    ) -> Intervals:
470        """Find all missing intervals between [start, end].
471
472        Although the inputs are inclusive, the returned stored intervals are
473        [start_ts, end_ts) or start epoch timestamp inclusive and end epoch
474        timestamp exclusive.
475
476        Args:
477            start: The start date/time of the interval (inclusive)
478            end: The end date/time of the interval (inclusive)
479
480        Returns:
481            A list of all the missing intervals as epoch timestamps.
482        """
483        if self.is_embedded_kind:
484            return []
485
486        start_dt, end_dt = make_inclusive(start, self.model.cron_floor(end))
487
488        if self.is_full_kind or self.is_view_kind or self.is_seed_kind:
489            latest_dt = to_datetime(self.model.cron_floor(latest or now()))
490            latest_ts = to_timestamp(latest_dt)
491            # if the latest ts is stored in the last interval, nothing is missing
492            # else returns the latest ts with the exclusive end ts.
493            if self.intervals and self.intervals[-1][1] >= latest_ts:
494                return []
495            return [(to_timestamp(self.model.cron_prev(latest_dt)), latest_ts)]
496
497        missing = []
498        dates = list(croniter_range(start_dt, end_dt, self.model.normalized_cron()))
499        size = len(dates)
500
501        for i in range(size):
502            current_ts = to_timestamp(dates[i])
503            end_ts = (
504                to_timestamp(dates[i + 1])
505                if i + 1 < size
506                else to_timestamp(self.model.cron_next(current_ts))
507            )
508
509            for low, high in self.intervals:
510                if current_ts < low:
511                    missing.append((current_ts, end_ts))
512                    break
513                elif current_ts < high:
514                    break
515            else:
516                missing.append((current_ts, end_ts))
517
518        return missing

Find all missing intervals between [start, end].

Although the inputs are inclusive, the returned stored intervals are [start_ts, end_ts) or start epoch timestamp inclusive and end epoch timestamp exclusive.

Arguments:
  • start: The start date/time of the interval (inclusive)
  • end: The end date/time of the interval (inclusive)
Returns:

A list of all the missing intervals as epoch timestamps.

520    def set_version(
521        self,
522        version: t.Optional[str | SnapshotDataVersion | SnapshotTableInfo | Snapshot] = None,
523    ) -> None:
524        """Set the version of this snapshot.
525
526        If no version is passed, the fingerprint of the snapshot will be used.
527
528        Args:
529            version: Either a string or a TableInfo to use.
530        """
531        if isinstance(version, (SnapshotDataVersion, SnapshotTableInfo, Snapshot)):
532            self.version = version.data_version.version
533        else:
534            self.version = version or self.fingerprint.to_version()

Set the version of this snapshot.

If no version is passed, the fingerprint of the snapshot will be used.

Arguments:
  • version: Either a string or a TableInfo to use.
def set_unpaused_ts( self, unpaused_dt: Union[datetime.date, datetime.datetime, str, int, float, NoneType]) -> None:
536    def set_unpaused_ts(self, unpaused_dt: t.Optional[TimeLike]) -> None:
537        """Sets the timestamp for when this snapshot was unpaused.
538
539        Args:
540            unpaused_dt: The datetime object of when this snapshot was unpaused.
541        """
542        self.unpaused_ts = to_timestamp(self.model.cron_floor(unpaused_dt)) if unpaused_dt else None

Sets the timestamp for when this snapshot was unpaused.

Arguments:
  • unpaused_dt: The datetime object of when this snapshot was unpaused.
def table_name(self, is_dev: bool = False, for_read: bool = False) -> str:
544    def table_name(self, is_dev: bool = False, for_read: bool = False) -> str:
545        """Full table name pointing to the materialized location of the snapshot.
546
547        Args:
548            is_dev: Whether the table name will be used in development mode.
549            for_read: Whether the table name will be used for reading by a different snapshot.
550        """
551        self._ensure_version()
552        assert self.version
553        return self._table_name(self.version, is_dev, for_read)

Full table name pointing to the materialized location of the snapshot.

Arguments:
  • is_dev: Whether the table name will be used in development mode.
  • for_read: Whether the table name will be used for reading by a different snapshot.
def table_name_for_mapping(self, is_dev: bool = False) -> str:
555    def table_name_for_mapping(self, is_dev: bool = False) -> str:
556        """Full table name used by a child snapshot for table mapping during evaluation.
557
558        Args:
559            is_dev: Whether the table name will be used in development mode.
560        """
561        self._ensure_version()
562        assert self.version
563
564        if is_dev and self.is_forward_only:
565            # If this snapshot is unpaused we shouldn't be using a temporary
566            # table for mapping purposes.
567            is_dev = self.is_paused
568
569        return self._table_name(self.version, is_dev, True)

Full table name used by a child snapshot for table mapping during evaluation.

Arguments:
  • is_dev: Whether the table name will be used in development mode.
def version_get_or_generate(self) -> str:
571    def version_get_or_generate(self) -> str:
572        """Helper method to get the version or generate it from the fingerprint."""
573        return self.version or self.fingerprint.to_version()

Helper method to get the version or generate it from the fingerprint.

Helper method to get the SnapshotTableInfo from the Snapshot.

is_new_version: bool

Returns whether or not this version is new and requires a backfill.

Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
SnapshotInfoMixin
is_temporary_table
previous_version
all_versions
data_hash_matches
def table_name( physical_schema: str, name: str, version: str, is_temp: bool = False) -> str:
652def table_name(physical_schema: str, name: str, version: str, is_temp: bool = False) -> str:
653    temp_suffx = "__temp" if is_temp else ""
654    return f"{physical_schema}.{name.replace('.', '__')}__{version}{temp_suffx}"
def fingerprint_from_model( model: Annotated[Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})], *, models: Dict[str, Annotated[Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})]], physical_schema: str = '', audits: Optional[Dict[str, sqlmesh.core.audit.definition.Audit]] = None, cache: Optional[Dict[str, sqlmesh.core.snapshot.definition.SnapshotFingerprint]] = None) -> sqlmesh.core.snapshot.definition.SnapshotFingerprint:
657def fingerprint_from_model(
658    model: Model,
659    *,
660    models: t.Dict[str, Model],
661    physical_schema: str = "",
662    audits: t.Optional[t.Dict[str, Audit]] = None,
663    cache: t.Optional[t.Dict[str, SnapshotFingerprint]] = None,
664) -> SnapshotFingerprint:
665    """Helper function to generate a fingerprint based on a model's query and environment.
666
667    This method tries to remove non meaningful differences to avoid ever changing fingerprints.
668    The fingerprint is made up of two parts split by an underscore -- query_metadata. The query hash is
669    determined purely by the rendered query and the metadata by everything else.
670
671    Args:
672        model: Model to fingerprint.
673        physical_schema: The physical_schema of the snapshot which represents where it is stored.
674        models: Dictionary of all models in the graph to make the fingerprint dependent on parent changes.
675            If no dictionary is passed in the fingerprint will not be dependent on a model's parents.
676        audits: Available audits by name.
677        cache: Cache of model name to fingerprints.
678
679    Returns:
680        The fingerprint.
681    """
682    cache = {} if cache is None else cache
683
684    if model.name not in cache:
685        parents = [
686            fingerprint_from_model(
687                models[table],
688                models=models,
689                physical_schema=physical_schema,
690                audits=audits,
691                cache=cache,
692            )
693            for table in model.depends_on
694            if table in models
695        ]
696
697        parent_data_hash = _hash(sorted(p.to_version() for p in parents))
698
699        parent_metadata_hash = _hash(
700            sorted(h for p in parents for h in (p.metadata_hash, p.parent_metadata_hash))
701        )
702
703        cache[model.name] = SnapshotFingerprint(
704            data_hash=_model_data_hash(model, physical_schema),
705            metadata_hash=_model_metadata_hash(model, audits or {}),
706            parent_data_hash=parent_data_hash,
707            parent_metadata_hash=parent_metadata_hash,
708        )
709
710    return cache[model.name]

Helper function to generate a fingerprint based on a model's query and environment.

This method tries to remove non meaningful differences to avoid ever changing fingerprints. The fingerprint is made up of two parts split by an underscore -- query_metadata. The query hash is determined purely by the rendered query and the metadata by everything else.

Arguments:
  • model: Model to fingerprint.
  • physical_schema: The physical_schema of the snapshot which represents where it is stored.
  • models: Dictionary of all models in the graph to make the fingerprint dependent on parent changes. If no dictionary is passed in the fingerprint will not be dependent on a model's parents.
  • audits: Available audits by name.
  • cache: Cache of model name to fingerprints.
Returns:

The fingerprint.

def merge_intervals(intervals: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
827def merge_intervals(intervals: Intervals) -> Intervals:
828    """Merge a list of intervals.
829
830    Args:
831        intervals: A list of intervals to merge together.
832
833    Returns:
834        A new list of sorted and merged intervals.
835    """
836    intervals = sorted(intervals)
837
838    merged = [intervals[0]]
839
840    for interval in intervals[1:]:
841        current = merged[-1]
842
843        if interval[0] <= current[1]:
844            merged[-1] = (current[0], max(current[1], interval[1]))
845        else:
846            merged.append(interval)
847
848    return merged

Merge a list of intervals.

Arguments:
  • intervals: A list of intervals to merge together.
Returns:

A new list of sorted and merged intervals.

def remove_interval( intervals: List[Tuple[int, int]], remove_start: int, remove_end: int) -> List[Tuple[int, int]]:
851def remove_interval(intervals: Intervals, remove_start: int, remove_end: int) -> Intervals:
852    """Remove an interval from a list of intervals.
853
854    Args:
855        intervals: A list of exclusive intervals.
856        remove_start: The inclusive start to remove.
857        remove_end: The exclusive end to remove.
858
859    Returns:
860        A new list of intervals.
861    """
862    modified: Intervals = []
863
864    for start, end in intervals:
865        if remove_start > start and remove_end < end:
866            modified.extend(
867                (
868                    (start, remove_start),
869                    (remove_end, end),
870                )
871            )
872        elif remove_start > start:
873            modified.append((start, min(remove_start, end)))
874        elif remove_end < end:
875            modified.append((max(remove_end, start), end))
876
877    return modified

Remove an interval from a list of intervals.

Arguments:
  • intervals: A list of exclusive intervals.
  • remove_start: The inclusive start to remove.
  • remove_end: The exclusive end to remove.
Returns:

A new list of intervals.

def to_table_mapping( snapshots: Iterable[sqlmesh.core.snapshot.definition.Snapshot], is_dev: bool) -> Dict[str, str]:
880def to_table_mapping(snapshots: t.Iterable[Snapshot], is_dev: bool) -> t.Dict[str, str]:
881    return {
882        snapshot.name: snapshot.table_name_for_mapping(is_dev=is_dev)
883        for snapshot in snapshots
884        if snapshot.version and not snapshot.is_embedded_kind
885    }