Edit on GitHub

ContextDiff

ContextDiff encapsulates the differences between two environments. The two environments can be the local environment and a remote environment, or two remote environments. ContextDiff is an important part of SQLMesh. SQLMesh plans use ContextDiff to determine what models were changed between two environments. The SQLMesh CLI diff command uses ContextDiff to determine what to visualize.

When creating a ContextDiff object, SQLMesh will compare the snapshots from one environment with those of another remote environment and determine if models have been added, removed, or modified.

  1"""
  2# ContextDiff
  3
  4ContextDiff encapsulates the differences between two environments. The two environments can be the local
  5environment and a remote environment, or two remote environments. ContextDiff is an important part of
  6SQLMesh. SQLMesh plans use ContextDiff to determine what models were changed between two environments.
  7The SQLMesh CLI diff command uses ContextDiff to determine what to visualize.
  8
  9When creating a ContextDiff object, SQLMesh will compare the snapshots from one environment with those of
 10another remote environment and determine if models have been added, removed, or modified.
 11"""
 12from __future__ import annotations
 13
 14import typing as t
 15
 16from sqlmesh.core.environment import Environment
 17from sqlmesh.core.snapshot import Snapshot, SnapshotDataVersion, SnapshotId
 18from sqlmesh.utils.errors import SQLMeshError
 19from sqlmesh.utils.pydantic import PydanticModel
 20
 21if t.TYPE_CHECKING:
 22    from sqlmesh.core.state_sync import StateReader
 23
 24
 25class ContextDiff(PydanticModel):
 26    """ContextDiff is an object representing the difference between two environments.
 27
 28    The two environments can be the local environment and a remote environment, or two remote
 29    environments.
 30    """
 31
 32    environment: str
 33    """The environment to diff."""
 34    is_new_environment: bool
 35    """Whether the target environment is new."""
 36    create_from: str
 37    """The name of the environment the target environment will be created from if new."""
 38    added: t.Set[str]
 39    """New models."""
 40    removed: t.Set[str]
 41    """Deleted models."""
 42    modified_snapshots: t.Dict[str, t.Tuple[Snapshot, Snapshot]]
 43    """Modified snapshots."""
 44    snapshots: t.Dict[str, Snapshot]
 45    """Merged snapshots."""
 46    new_snapshots: t.Dict[SnapshotId, Snapshot]
 47    """New snapshots."""
 48    previous_plan_id: t.Optional[str]
 49    """Previous plan id."""
 50
 51    @classmethod
 52    def create(
 53        cls,
 54        environment: str | Environment,
 55        snapshots: t.Dict[str, Snapshot],
 56        create_from: str,
 57        state_reader: StateReader,
 58    ) -> ContextDiff:
 59        """Create a ContextDiff object.
 60
 61        Args:
 62            environment: The remote environment to diff.
 63            snapshots: The snapshots of the current environment.
 64            create_from: The environment to create the target environment from if it
 65                doesn't exist.
 66            state_reader: StateReader to access the remote environment to diff.
 67
 68        Returns:
 69            The ContextDiff object.
 70        """
 71        if isinstance(environment, str):
 72            environment = environment.lower()
 73            env = state_reader.get_environment(environment)
 74        else:
 75            env = environment
 76            environment = env.name.lower()
 77
 78        if env is None:
 79            env = state_reader.get_environment(create_from.lower())
 80            is_new_environment = True
 81        else:
 82            is_new_environment = False
 83
 84        existing_info = {info.name: info for info in (env.snapshots if env else [])}
 85        existing_models = set(existing_info)
 86        current_models = set(snapshots)
 87        removed = existing_models - current_models
 88        added = current_models - existing_models
 89        modified_info = {
 90            name: existing_info[name]
 91            for name, snapshot in snapshots.items()
 92            if name not in added and snapshot.fingerprint != existing_info[name].fingerprint
 93        }
 94
 95        stored = state_reader.get_snapshots(
 96            list(modified_info.values()) + [snapshot.snapshot_id for snapshot in snapshots.values()]
 97        )
 98
 99        merged_snapshots = {}
100        modified_snapshots = {}
101        new_snapshots = {}
102        snapshot_remote_versions: t.Dict[str, t.Tuple[t.Tuple[SnapshotDataVersion, ...], int]] = {}
103
104        for name, snapshot in snapshots.items():
105            modified = modified_info.get(name)
106            existing = stored.get(snapshot.snapshot_id)
107
108            if existing:
109                merged_snapshots[name] = existing.copy()
110                if modified:
111                    modified_snapshots[name] = (existing, stored[modified.snapshot_id])
112                    for child, versions in existing.indirect_versions.items():
113                        existing_versions = snapshot_remote_versions.get(child)
114                        if not existing_versions or existing_versions[1] < existing.created_ts:
115                            snapshot_remote_versions[child] = (
116                                versions,
117                                existing.created_ts,
118                            )
119            else:
120                snapshot = snapshot.copy()
121                merged_snapshots[name] = snapshot
122                new_snapshots[snapshot.snapshot_id] = snapshot
123                if modified:
124                    snapshot.previous_versions = modified.all_versions
125                    modified_snapshots[name] = (snapshot, stored[modified.snapshot_id])
126
127        for snapshot in new_snapshots.values():
128            if (
129                snapshot.name in snapshot_remote_versions
130                and snapshot.previous_version
131                and snapshot.data_hash_matches(snapshot.previous_version)
132            ):
133                remote_versions = snapshot_remote_versions[snapshot.name][0]
134                remote_head = remote_versions[-1].version
135                local_head = snapshot.previous_version.version
136
137                if remote_head in (local.version for local in snapshot.previous_versions):
138                    snapshot.set_version(local_head)
139                elif local_head in (remote.version for remote in remote_versions):
140                    snapshot.set_version(remote_head)
141                else:
142                    snapshot.set_version()
143
144        return ContextDiff(
145            environment=environment,
146            is_new_environment=is_new_environment,
147            create_from=create_from,
148            added=added,
149            removed=removed,
150            modified_snapshots=modified_snapshots,
151            snapshots=merged_snapshots,
152            new_snapshots=new_snapshots,
153            previous_plan_id=env.plan_id if env and not is_new_environment else None,
154        )
155
156    @property
157    def has_changes(self) -> bool:
158        return self.has_snapshot_changes or self.is_new_environment
159
160    @property
161    def has_snapshot_changes(self) -> bool:
162        return bool(self.added or self.removed or self.modified_snapshots)
163
164    def directly_modified(self, model_name: str) -> bool:
165        """Returns whether or not a model was directly modified in this context.
166
167        Args:
168            model_name: The model name to check.
169
170        Returns:
171            Whether or not the model was directly modified.
172        """
173
174        if model_name not in self.modified_snapshots:
175            return False
176
177        current, previous = self.modified_snapshots[model_name]
178        return current.fingerprint.data_hash != previous.fingerprint.data_hash
179
180    def indirectly_modified(self, model_name: str) -> bool:
181        """Returns whether or not a model was indirectly modified in this context.
182
183        Args:
184            model_name: The model name to check.
185
186        Returns:
187            Whether or not the model was indirectly modified.
188        """
189
190        if model_name not in self.modified_snapshots:
191            return False
192
193        current, previous = self.modified_snapshots[model_name]
194        return (
195            current.fingerprint.data_hash == previous.fingerprint.data_hash
196            and current.fingerprint.parent_data_hash != previous.fingerprint.parent_data_hash
197        )
198
199    def metadata_updated(self, model_name: str) -> bool:
200        """Returns whether or not the given model's metadata has been updated.
201
202        Args:
203            model_name: The model name to check.
204
205        Returns:
206            Whether or not the model's metadata has been updated.
207        """
208
209        if model_name not in self.modified_snapshots:
210            return False
211
212        current, previous = self.modified_snapshots[model_name]
213        return current.fingerprint.metadata_hash != previous.fingerprint.metadata_hash
214
215    def text_diff(self, model: str) -> str:
216        """Finds the difference of a model between the current and remote environment.
217
218        Args:
219            model: The model name.
220
221        Returns:
222            A unified text diff of the model.
223        """
224        if model not in self.snapshots:
225            raise SQLMeshError(f"`{model}` does not exist.")
226        if model not in self.modified_snapshots:
227            return ""
228
229        new, old = self.modified_snapshots[model]
230        return old.model.text_diff(new.model)
class ContextDiff(sqlmesh.utils.pydantic.PydanticModel):
 26class ContextDiff(PydanticModel):
 27    """ContextDiff is an object representing the difference between two environments.
 28
 29    The two environments can be the local environment and a remote environment, or two remote
 30    environments.
 31    """
 32
 33    environment: str
 34    """The environment to diff."""
 35    is_new_environment: bool
 36    """Whether the target environment is new."""
 37    create_from: str
 38    """The name of the environment the target environment will be created from if new."""
 39    added: t.Set[str]
 40    """New models."""
 41    removed: t.Set[str]
 42    """Deleted models."""
 43    modified_snapshots: t.Dict[str, t.Tuple[Snapshot, Snapshot]]
 44    """Modified snapshots."""
 45    snapshots: t.Dict[str, Snapshot]
 46    """Merged snapshots."""
 47    new_snapshots: t.Dict[SnapshotId, Snapshot]
 48    """New snapshots."""
 49    previous_plan_id: t.Optional[str]
 50    """Previous plan id."""
 51
 52    @classmethod
 53    def create(
 54        cls,
 55        environment: str | Environment,
 56        snapshots: t.Dict[str, Snapshot],
 57        create_from: str,
 58        state_reader: StateReader,
 59    ) -> ContextDiff:
 60        """Create a ContextDiff object.
 61
 62        Args:
 63            environment: The remote environment to diff.
 64            snapshots: The snapshots of the current environment.
 65            create_from: The environment to create the target environment from if it
 66                doesn't exist.
 67            state_reader: StateReader to access the remote environment to diff.
 68
 69        Returns:
 70            The ContextDiff object.
 71        """
 72        if isinstance(environment, str):
 73            environment = environment.lower()
 74            env = state_reader.get_environment(environment)
 75        else:
 76            env = environment
 77            environment = env.name.lower()
 78
 79        if env is None:
 80            env = state_reader.get_environment(create_from.lower())
 81            is_new_environment = True
 82        else:
 83            is_new_environment = False
 84
 85        existing_info = {info.name: info for info in (env.snapshots if env else [])}
 86        existing_models = set(existing_info)
 87        current_models = set(snapshots)
 88        removed = existing_models - current_models
 89        added = current_models - existing_models
 90        modified_info = {
 91            name: existing_info[name]
 92            for name, snapshot in snapshots.items()
 93            if name not in added and snapshot.fingerprint != existing_info[name].fingerprint
 94        }
 95
 96        stored = state_reader.get_snapshots(
 97            list(modified_info.values()) + [snapshot.snapshot_id for snapshot in snapshots.values()]
 98        )
 99
100        merged_snapshots = {}
101        modified_snapshots = {}
102        new_snapshots = {}
103        snapshot_remote_versions: t.Dict[str, t.Tuple[t.Tuple[SnapshotDataVersion, ...], int]] = {}
104
105        for name, snapshot in snapshots.items():
106            modified = modified_info.get(name)
107            existing = stored.get(snapshot.snapshot_id)
108
109            if existing:
110                merged_snapshots[name] = existing.copy()
111                if modified:
112                    modified_snapshots[name] = (existing, stored[modified.snapshot_id])
113                    for child, versions in existing.indirect_versions.items():
114                        existing_versions = snapshot_remote_versions.get(child)
115                        if not existing_versions or existing_versions[1] < existing.created_ts:
116                            snapshot_remote_versions[child] = (
117                                versions,
118                                existing.created_ts,
119                            )
120            else:
121                snapshot = snapshot.copy()
122                merged_snapshots[name] = snapshot
123                new_snapshots[snapshot.snapshot_id] = snapshot
124                if modified:
125                    snapshot.previous_versions = modified.all_versions
126                    modified_snapshots[name] = (snapshot, stored[modified.snapshot_id])
127
128        for snapshot in new_snapshots.values():
129            if (
130                snapshot.name in snapshot_remote_versions
131                and snapshot.previous_version
132                and snapshot.data_hash_matches(snapshot.previous_version)
133            ):
134                remote_versions = snapshot_remote_versions[snapshot.name][0]
135                remote_head = remote_versions[-1].version
136                local_head = snapshot.previous_version.version
137
138                if remote_head in (local.version for local in snapshot.previous_versions):
139                    snapshot.set_version(local_head)
140                elif local_head in (remote.version for remote in remote_versions):
141                    snapshot.set_version(remote_head)
142                else:
143                    snapshot.set_version()
144
145        return ContextDiff(
146            environment=environment,
147            is_new_environment=is_new_environment,
148            create_from=create_from,
149            added=added,
150            removed=removed,
151            modified_snapshots=modified_snapshots,
152            snapshots=merged_snapshots,
153            new_snapshots=new_snapshots,
154            previous_plan_id=env.plan_id if env and not is_new_environment else None,
155        )
156
157    @property
158    def has_changes(self) -> bool:
159        return self.has_snapshot_changes or self.is_new_environment
160
161    @property
162    def has_snapshot_changes(self) -> bool:
163        return bool(self.added or self.removed or self.modified_snapshots)
164
165    def directly_modified(self, model_name: str) -> bool:
166        """Returns whether or not a model was directly modified in this context.
167
168        Args:
169            model_name: The model name to check.
170
171        Returns:
172            Whether or not the model was directly modified.
173        """
174
175        if model_name not in self.modified_snapshots:
176            return False
177
178        current, previous = self.modified_snapshots[model_name]
179        return current.fingerprint.data_hash != previous.fingerprint.data_hash
180
181    def indirectly_modified(self, model_name: str) -> bool:
182        """Returns whether or not a model was indirectly modified in this context.
183
184        Args:
185            model_name: The model name to check.
186
187        Returns:
188            Whether or not the model was indirectly modified.
189        """
190
191        if model_name not in self.modified_snapshots:
192            return False
193
194        current, previous = self.modified_snapshots[model_name]
195        return (
196            current.fingerprint.data_hash == previous.fingerprint.data_hash
197            and current.fingerprint.parent_data_hash != previous.fingerprint.parent_data_hash
198        )
199
200    def metadata_updated(self, model_name: str) -> bool:
201        """Returns whether or not the given model's metadata has been updated.
202
203        Args:
204            model_name: The model name to check.
205
206        Returns:
207            Whether or not the model's metadata has been updated.
208        """
209
210        if model_name not in self.modified_snapshots:
211            return False
212
213        current, previous = self.modified_snapshots[model_name]
214        return current.fingerprint.metadata_hash != previous.fingerprint.metadata_hash
215
216    def text_diff(self, model: str) -> str:
217        """Finds the difference of a model between the current and remote environment.
218
219        Args:
220            model: The model name.
221
222        Returns:
223            A unified text diff of the model.
224        """
225        if model not in self.snapshots:
226            raise SQLMeshError(f"`{model}` does not exist.")
227        if model not in self.modified_snapshots:
228            return ""
229
230        new, old = self.modified_snapshots[model]
231        return old.model.text_diff(new.model)

ContextDiff is an object representing the difference between two environments.

The two environments can be the local environment and a remote environment, or two remote environments.

environment: str

The environment to diff.

is_new_environment: bool

Whether the target environment is new.

create_from: str

The name of the environment the target environment will be created from if new.

added: Set[str]

New models.

removed: Set[str]

Deleted models.

Modified snapshots.

Merged snapshots.

previous_plan_id: Optional[str]

Previous plan id.

@classmethod
def create( cls, environment: str | sqlmesh.core.environment.Environment, snapshots: Dict[str, sqlmesh.core.snapshot.definition.Snapshot], create_from: str, state_reader: sqlmesh.core.state_sync.base.StateReader) -> sqlmesh.core.context_diff.ContextDiff:
 52    @classmethod
 53    def create(
 54        cls,
 55        environment: str | Environment,
 56        snapshots: t.Dict[str, Snapshot],
 57        create_from: str,
 58        state_reader: StateReader,
 59    ) -> ContextDiff:
 60        """Create a ContextDiff object.
 61
 62        Args:
 63            environment: The remote environment to diff.
 64            snapshots: The snapshots of the current environment.
 65            create_from: The environment to create the target environment from if it
 66                doesn't exist.
 67            state_reader: StateReader to access the remote environment to diff.
 68
 69        Returns:
 70            The ContextDiff object.
 71        """
 72        if isinstance(environment, str):
 73            environment = environment.lower()
 74            env = state_reader.get_environment(environment)
 75        else:
 76            env = environment
 77            environment = env.name.lower()
 78
 79        if env is None:
 80            env = state_reader.get_environment(create_from.lower())
 81            is_new_environment = True
 82        else:
 83            is_new_environment = False
 84
 85        existing_info = {info.name: info for info in (env.snapshots if env else [])}
 86        existing_models = set(existing_info)
 87        current_models = set(snapshots)
 88        removed = existing_models - current_models
 89        added = current_models - existing_models
 90        modified_info = {
 91            name: existing_info[name]
 92            for name, snapshot in snapshots.items()
 93            if name not in added and snapshot.fingerprint != existing_info[name].fingerprint
 94        }
 95
 96        stored = state_reader.get_snapshots(
 97            list(modified_info.values()) + [snapshot.snapshot_id for snapshot in snapshots.values()]
 98        )
 99
100        merged_snapshots = {}
101        modified_snapshots = {}
102        new_snapshots = {}
103        snapshot_remote_versions: t.Dict[str, t.Tuple[t.Tuple[SnapshotDataVersion, ...], int]] = {}
104
105        for name, snapshot in snapshots.items():
106            modified = modified_info.get(name)
107            existing = stored.get(snapshot.snapshot_id)
108
109            if existing:
110                merged_snapshots[name] = existing.copy()
111                if modified:
112                    modified_snapshots[name] = (existing, stored[modified.snapshot_id])
113                    for child, versions in existing.indirect_versions.items():
114                        existing_versions = snapshot_remote_versions.get(child)
115                        if not existing_versions or existing_versions[1] < existing.created_ts:
116                            snapshot_remote_versions[child] = (
117                                versions,
118                                existing.created_ts,
119                            )
120            else:
121                snapshot = snapshot.copy()
122                merged_snapshots[name] = snapshot
123                new_snapshots[snapshot.snapshot_id] = snapshot
124                if modified:
125                    snapshot.previous_versions = modified.all_versions
126                    modified_snapshots[name] = (snapshot, stored[modified.snapshot_id])
127
128        for snapshot in new_snapshots.values():
129            if (
130                snapshot.name in snapshot_remote_versions
131                and snapshot.previous_version
132                and snapshot.data_hash_matches(snapshot.previous_version)
133            ):
134                remote_versions = snapshot_remote_versions[snapshot.name][0]
135                remote_head = remote_versions[-1].version
136                local_head = snapshot.previous_version.version
137
138                if remote_head in (local.version for local in snapshot.previous_versions):
139                    snapshot.set_version(local_head)
140                elif local_head in (remote.version for remote in remote_versions):
141                    snapshot.set_version(remote_head)
142                else:
143                    snapshot.set_version()
144
145        return ContextDiff(
146            environment=environment,
147            is_new_environment=is_new_environment,
148            create_from=create_from,
149            added=added,
150            removed=removed,
151            modified_snapshots=modified_snapshots,
152            snapshots=merged_snapshots,
153            new_snapshots=new_snapshots,
154            previous_plan_id=env.plan_id if env and not is_new_environment else None,
155        )

Create a ContextDiff object.

Arguments:
  • environment: The remote environment to diff.
  • snapshots: The snapshots of the current environment.
  • create_from: The environment to create the target environment from if it doesn't exist.
  • state_reader: StateReader to access the remote environment to diff.
Returns:

The ContextDiff object.

def directly_modified(self, model_name: str) -> bool:
165    def directly_modified(self, model_name: str) -> bool:
166        """Returns whether or not a model was directly modified in this context.
167
168        Args:
169            model_name: The model name to check.
170
171        Returns:
172            Whether or not the model was directly modified.
173        """
174
175        if model_name not in self.modified_snapshots:
176            return False
177
178        current, previous = self.modified_snapshots[model_name]
179        return current.fingerprint.data_hash != previous.fingerprint.data_hash

Returns whether or not a model was directly modified in this context.

Arguments:
  • model_name: The model name to check.
Returns:

Whether or not the model was directly modified.

def indirectly_modified(self, model_name: str) -> bool:
181    def indirectly_modified(self, model_name: str) -> bool:
182        """Returns whether or not a model was indirectly modified in this context.
183
184        Args:
185            model_name: The model name to check.
186
187        Returns:
188            Whether or not the model was indirectly modified.
189        """
190
191        if model_name not in self.modified_snapshots:
192            return False
193
194        current, previous = self.modified_snapshots[model_name]
195        return (
196            current.fingerprint.data_hash == previous.fingerprint.data_hash
197            and current.fingerprint.parent_data_hash != previous.fingerprint.parent_data_hash
198        )

Returns whether or not a model was indirectly modified in this context.

Arguments:
  • model_name: The model name to check.
Returns:

Whether or not the model was indirectly modified.

def metadata_updated(self, model_name: str) -> bool:
200    def metadata_updated(self, model_name: str) -> bool:
201        """Returns whether or not the given model's metadata has been updated.
202
203        Args:
204            model_name: The model name to check.
205
206        Returns:
207            Whether or not the model's metadata has been updated.
208        """
209
210        if model_name not in self.modified_snapshots:
211            return False
212
213        current, previous = self.modified_snapshots[model_name]
214        return current.fingerprint.metadata_hash != previous.fingerprint.metadata_hash

Returns whether or not the given model's metadata has been updated.

Arguments:
  • model_name: The model name to check.
Returns:

Whether or not the model's metadata has been updated.

def text_diff(self, model: str) -> str:
216    def text_diff(self, model: str) -> str:
217        """Finds the difference of a model between the current and remote environment.
218
219        Args:
220            model: The model name.
221
222        Returns:
223            A unified text diff of the model.
224        """
225        if model not in self.snapshots:
226            raise SQLMeshError(f"`{model}` does not exist.")
227        if model not in self.modified_snapshots:
228            return ""
229
230        new, old = self.modified_snapshots[model]
231        return old.model.text_diff(new.model)

Finds the difference of a model between the current and remote environment.

Arguments:
  • model: The model name.
Returns:

A unified text diff of the model.

Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields