ContextDiff
ContextDiff encapsulates the differences between two environments. The two environments can be the local environment and a remote environment, or two remote environments. ContextDiff is an important part of SQLMesh. SQLMesh plans use ContextDiff to determine what models were changed between two environments. The SQLMesh CLI diff command uses ContextDiff to determine what to visualize.
When creating a ContextDiff object, SQLMesh will compare the snapshots from one environment with those of another remote environment and determine if models have been added, removed, or modified.
1""" 2# ContextDiff 3 4ContextDiff encapsulates the differences between two environments. The two environments can be the local 5environment and a remote environment, or two remote environments. ContextDiff is an important part of 6SQLMesh. SQLMesh plans use ContextDiff to determine what models were changed between two environments. 7The SQLMesh CLI diff command uses ContextDiff to determine what to visualize. 8 9When creating a ContextDiff object, SQLMesh will compare the snapshots from one environment with those of 10another remote environment and determine if models have been added, removed, or modified. 11""" 12from __future__ import annotations 13 14import typing as t 15 16from sqlmesh.core.environment import Environment 17from sqlmesh.core.snapshot import Snapshot, SnapshotDataVersion, SnapshotId 18from sqlmesh.utils.errors import SQLMeshError 19from sqlmesh.utils.pydantic import PydanticModel 20 21if t.TYPE_CHECKING: 22 from sqlmesh.core.state_sync import StateReader 23 24 25class ContextDiff(PydanticModel): 26 """ContextDiff is an object representing the difference between two environments. 27 28 The two environments can be the local environment and a remote environment, or two remote 29 environments. 30 """ 31 32 environment: str 33 """The environment to diff.""" 34 is_new_environment: bool 35 """Whether the target environment is new.""" 36 create_from: str 37 """The name of the environment the target environment will be created from if new.""" 38 added: t.Set[str] 39 """New models.""" 40 removed: t.Set[str] 41 """Deleted models.""" 42 modified_snapshots: t.Dict[str, t.Tuple[Snapshot, Snapshot]] 43 """Modified snapshots.""" 44 snapshots: t.Dict[str, Snapshot] 45 """Merged snapshots.""" 46 new_snapshots: t.Dict[SnapshotId, Snapshot] 47 """New snapshots.""" 48 previous_plan_id: t.Optional[str] 49 """Previous plan id.""" 50 51 @classmethod 52 def create( 53 cls, 54 environment: str | Environment, 55 snapshots: t.Dict[str, Snapshot], 56 create_from: str, 57 state_reader: StateReader, 58 ) -> ContextDiff: 59 """Create a ContextDiff object. 60 61 Args: 62 environment: The remote environment to diff. 63 snapshots: The snapshots of the current environment. 64 create_from: The environment to create the target environment from if it 65 doesn't exist. 66 state_reader: StateReader to access the remote environment to diff. 67 68 Returns: 69 The ContextDiff object. 70 """ 71 if isinstance(environment, str): 72 environment = environment.lower() 73 env = state_reader.get_environment(environment) 74 else: 75 env = environment 76 environment = env.name.lower() 77 78 if env is None: 79 env = state_reader.get_environment(create_from.lower()) 80 is_new_environment = True 81 else: 82 is_new_environment = False 83 84 existing_info = {info.name: info for info in (env.snapshots if env else [])} 85 existing_models = set(existing_info) 86 current_models = set(snapshots) 87 removed = existing_models - current_models 88 added = current_models - existing_models 89 modified_info = { 90 name: existing_info[name] 91 for name, snapshot in snapshots.items() 92 if name not in added and snapshot.fingerprint != existing_info[name].fingerprint 93 } 94 95 stored = state_reader.get_snapshots( 96 list(modified_info.values()) + [snapshot.snapshot_id for snapshot in snapshots.values()] 97 ) 98 99 merged_snapshots = {} 100 modified_snapshots = {} 101 new_snapshots = {} 102 snapshot_remote_versions: t.Dict[str, t.Tuple[t.Tuple[SnapshotDataVersion, ...], int]] = {} 103 104 for name, snapshot in snapshots.items(): 105 modified = modified_info.get(name) 106 existing = stored.get(snapshot.snapshot_id) 107 108 if existing: 109 merged_snapshots[name] = existing.copy() 110 if modified: 111 modified_snapshots[name] = (existing, stored[modified.snapshot_id]) 112 for child, versions in existing.indirect_versions.items(): 113 existing_versions = snapshot_remote_versions.get(child) 114 if not existing_versions or existing_versions[1] < existing.created_ts: 115 snapshot_remote_versions[child] = ( 116 versions, 117 existing.created_ts, 118 ) 119 else: 120 snapshot = snapshot.copy() 121 merged_snapshots[name] = snapshot 122 new_snapshots[snapshot.snapshot_id] = snapshot 123 if modified: 124 snapshot.previous_versions = modified.all_versions 125 modified_snapshots[name] = (snapshot, stored[modified.snapshot_id]) 126 127 for snapshot in new_snapshots.values(): 128 if ( 129 snapshot.name in snapshot_remote_versions 130 and snapshot.previous_version 131 and snapshot.data_hash_matches(snapshot.previous_version) 132 ): 133 remote_versions = snapshot_remote_versions[snapshot.name][0] 134 remote_head = remote_versions[-1].version 135 local_head = snapshot.previous_version.version 136 137 if remote_head in (local.version for local in snapshot.previous_versions): 138 snapshot.set_version(local_head) 139 elif local_head in (remote.version for remote in remote_versions): 140 snapshot.set_version(remote_head) 141 else: 142 snapshot.set_version() 143 144 return ContextDiff( 145 environment=environment, 146 is_new_environment=is_new_environment, 147 create_from=create_from, 148 added=added, 149 removed=removed, 150 modified_snapshots=modified_snapshots, 151 snapshots=merged_snapshots, 152 new_snapshots=new_snapshots, 153 previous_plan_id=env.plan_id if env and not is_new_environment else None, 154 ) 155 156 @property 157 def has_changes(self) -> bool: 158 return self.has_snapshot_changes or self.is_new_environment 159 160 @property 161 def has_snapshot_changes(self) -> bool: 162 return bool(self.added or self.removed or self.modified_snapshots) 163 164 def directly_modified(self, model_name: str) -> bool: 165 """Returns whether or not a model was directly modified in this context. 166 167 Args: 168 model_name: The model name to check. 169 170 Returns: 171 Whether or not the model was directly modified. 172 """ 173 174 if model_name not in self.modified_snapshots: 175 return False 176 177 current, previous = self.modified_snapshots[model_name] 178 return current.fingerprint.data_hash != previous.fingerprint.data_hash 179 180 def indirectly_modified(self, model_name: str) -> bool: 181 """Returns whether or not a model was indirectly modified in this context. 182 183 Args: 184 model_name: The model name to check. 185 186 Returns: 187 Whether or not the model was indirectly modified. 188 """ 189 190 if model_name not in self.modified_snapshots: 191 return False 192 193 current, previous = self.modified_snapshots[model_name] 194 return ( 195 current.fingerprint.data_hash == previous.fingerprint.data_hash 196 and current.fingerprint.parent_data_hash != previous.fingerprint.parent_data_hash 197 ) 198 199 def metadata_updated(self, model_name: str) -> bool: 200 """Returns whether or not the given model's metadata has been updated. 201 202 Args: 203 model_name: The model name to check. 204 205 Returns: 206 Whether or not the model's metadata has been updated. 207 """ 208 209 if model_name not in self.modified_snapshots: 210 return False 211 212 current, previous = self.modified_snapshots[model_name] 213 return current.fingerprint.metadata_hash != previous.fingerprint.metadata_hash 214 215 def text_diff(self, model: str) -> str: 216 """Finds the difference of a model between the current and remote environment. 217 218 Args: 219 model: The model name. 220 221 Returns: 222 A unified text diff of the model. 223 """ 224 if model not in self.snapshots: 225 raise SQLMeshError(f"`{model}` does not exist.") 226 if model not in self.modified_snapshots: 227 return "" 228 229 new, old = self.modified_snapshots[model] 230 return old.model.text_diff(new.model)
26class ContextDiff(PydanticModel): 27 """ContextDiff is an object representing the difference between two environments. 28 29 The two environments can be the local environment and a remote environment, or two remote 30 environments. 31 """ 32 33 environment: str 34 """The environment to diff.""" 35 is_new_environment: bool 36 """Whether the target environment is new.""" 37 create_from: str 38 """The name of the environment the target environment will be created from if new.""" 39 added: t.Set[str] 40 """New models.""" 41 removed: t.Set[str] 42 """Deleted models.""" 43 modified_snapshots: t.Dict[str, t.Tuple[Snapshot, Snapshot]] 44 """Modified snapshots.""" 45 snapshots: t.Dict[str, Snapshot] 46 """Merged snapshots.""" 47 new_snapshots: t.Dict[SnapshotId, Snapshot] 48 """New snapshots.""" 49 previous_plan_id: t.Optional[str] 50 """Previous plan id.""" 51 52 @classmethod 53 def create( 54 cls, 55 environment: str | Environment, 56 snapshots: t.Dict[str, Snapshot], 57 create_from: str, 58 state_reader: StateReader, 59 ) -> ContextDiff: 60 """Create a ContextDiff object. 61 62 Args: 63 environment: The remote environment to diff. 64 snapshots: The snapshots of the current environment. 65 create_from: The environment to create the target environment from if it 66 doesn't exist. 67 state_reader: StateReader to access the remote environment to diff. 68 69 Returns: 70 The ContextDiff object. 71 """ 72 if isinstance(environment, str): 73 environment = environment.lower() 74 env = state_reader.get_environment(environment) 75 else: 76 env = environment 77 environment = env.name.lower() 78 79 if env is None: 80 env = state_reader.get_environment(create_from.lower()) 81 is_new_environment = True 82 else: 83 is_new_environment = False 84 85 existing_info = {info.name: info for info in (env.snapshots if env else [])} 86 existing_models = set(existing_info) 87 current_models = set(snapshots) 88 removed = existing_models - current_models 89 added = current_models - existing_models 90 modified_info = { 91 name: existing_info[name] 92 for name, snapshot in snapshots.items() 93 if name not in added and snapshot.fingerprint != existing_info[name].fingerprint 94 } 95 96 stored = state_reader.get_snapshots( 97 list(modified_info.values()) + [snapshot.snapshot_id for snapshot in snapshots.values()] 98 ) 99 100 merged_snapshots = {} 101 modified_snapshots = {} 102 new_snapshots = {} 103 snapshot_remote_versions: t.Dict[str, t.Tuple[t.Tuple[SnapshotDataVersion, ...], int]] = {} 104 105 for name, snapshot in snapshots.items(): 106 modified = modified_info.get(name) 107 existing = stored.get(snapshot.snapshot_id) 108 109 if existing: 110 merged_snapshots[name] = existing.copy() 111 if modified: 112 modified_snapshots[name] = (existing, stored[modified.snapshot_id]) 113 for child, versions in existing.indirect_versions.items(): 114 existing_versions = snapshot_remote_versions.get(child) 115 if not existing_versions or existing_versions[1] < existing.created_ts: 116 snapshot_remote_versions[child] = ( 117 versions, 118 existing.created_ts, 119 ) 120 else: 121 snapshot = snapshot.copy() 122 merged_snapshots[name] = snapshot 123 new_snapshots[snapshot.snapshot_id] = snapshot 124 if modified: 125 snapshot.previous_versions = modified.all_versions 126 modified_snapshots[name] = (snapshot, stored[modified.snapshot_id]) 127 128 for snapshot in new_snapshots.values(): 129 if ( 130 snapshot.name in snapshot_remote_versions 131 and snapshot.previous_version 132 and snapshot.data_hash_matches(snapshot.previous_version) 133 ): 134 remote_versions = snapshot_remote_versions[snapshot.name][0] 135 remote_head = remote_versions[-1].version 136 local_head = snapshot.previous_version.version 137 138 if remote_head in (local.version for local in snapshot.previous_versions): 139 snapshot.set_version(local_head) 140 elif local_head in (remote.version for remote in remote_versions): 141 snapshot.set_version(remote_head) 142 else: 143 snapshot.set_version() 144 145 return ContextDiff( 146 environment=environment, 147 is_new_environment=is_new_environment, 148 create_from=create_from, 149 added=added, 150 removed=removed, 151 modified_snapshots=modified_snapshots, 152 snapshots=merged_snapshots, 153 new_snapshots=new_snapshots, 154 previous_plan_id=env.plan_id if env and not is_new_environment else None, 155 ) 156 157 @property 158 def has_changes(self) -> bool: 159 return self.has_snapshot_changes or self.is_new_environment 160 161 @property 162 def has_snapshot_changes(self) -> bool: 163 return bool(self.added or self.removed or self.modified_snapshots) 164 165 def directly_modified(self, model_name: str) -> bool: 166 """Returns whether or not a model was directly modified in this context. 167 168 Args: 169 model_name: The model name to check. 170 171 Returns: 172 Whether or not the model was directly modified. 173 """ 174 175 if model_name not in self.modified_snapshots: 176 return False 177 178 current, previous = self.modified_snapshots[model_name] 179 return current.fingerprint.data_hash != previous.fingerprint.data_hash 180 181 def indirectly_modified(self, model_name: str) -> bool: 182 """Returns whether or not a model was indirectly modified in this context. 183 184 Args: 185 model_name: The model name to check. 186 187 Returns: 188 Whether or not the model was indirectly modified. 189 """ 190 191 if model_name not in self.modified_snapshots: 192 return False 193 194 current, previous = self.modified_snapshots[model_name] 195 return ( 196 current.fingerprint.data_hash == previous.fingerprint.data_hash 197 and current.fingerprint.parent_data_hash != previous.fingerprint.parent_data_hash 198 ) 199 200 def metadata_updated(self, model_name: str) -> bool: 201 """Returns whether or not the given model's metadata has been updated. 202 203 Args: 204 model_name: The model name to check. 205 206 Returns: 207 Whether or not the model's metadata has been updated. 208 """ 209 210 if model_name not in self.modified_snapshots: 211 return False 212 213 current, previous = self.modified_snapshots[model_name] 214 return current.fingerprint.metadata_hash != previous.fingerprint.metadata_hash 215 216 def text_diff(self, model: str) -> str: 217 """Finds the difference of a model between the current and remote environment. 218 219 Args: 220 model: The model name. 221 222 Returns: 223 A unified text diff of the model. 224 """ 225 if model not in self.snapshots: 226 raise SQLMeshError(f"`{model}` does not exist.") 227 if model not in self.modified_snapshots: 228 return "" 229 230 new, old = self.modified_snapshots[model] 231 return old.model.text_diff(new.model)
ContextDiff is an object representing the difference between two environments.
The two environments can be the local environment and a remote environment, or two remote environments.
Modified snapshots.
New snapshots.
52 @classmethod 53 def create( 54 cls, 55 environment: str | Environment, 56 snapshots: t.Dict[str, Snapshot], 57 create_from: str, 58 state_reader: StateReader, 59 ) -> ContextDiff: 60 """Create a ContextDiff object. 61 62 Args: 63 environment: The remote environment to diff. 64 snapshots: The snapshots of the current environment. 65 create_from: The environment to create the target environment from if it 66 doesn't exist. 67 state_reader: StateReader to access the remote environment to diff. 68 69 Returns: 70 The ContextDiff object. 71 """ 72 if isinstance(environment, str): 73 environment = environment.lower() 74 env = state_reader.get_environment(environment) 75 else: 76 env = environment 77 environment = env.name.lower() 78 79 if env is None: 80 env = state_reader.get_environment(create_from.lower()) 81 is_new_environment = True 82 else: 83 is_new_environment = False 84 85 existing_info = {info.name: info for info in (env.snapshots if env else [])} 86 existing_models = set(existing_info) 87 current_models = set(snapshots) 88 removed = existing_models - current_models 89 added = current_models - existing_models 90 modified_info = { 91 name: existing_info[name] 92 for name, snapshot in snapshots.items() 93 if name not in added and snapshot.fingerprint != existing_info[name].fingerprint 94 } 95 96 stored = state_reader.get_snapshots( 97 list(modified_info.values()) + [snapshot.snapshot_id for snapshot in snapshots.values()] 98 ) 99 100 merged_snapshots = {} 101 modified_snapshots = {} 102 new_snapshots = {} 103 snapshot_remote_versions: t.Dict[str, t.Tuple[t.Tuple[SnapshotDataVersion, ...], int]] = {} 104 105 for name, snapshot in snapshots.items(): 106 modified = modified_info.get(name) 107 existing = stored.get(snapshot.snapshot_id) 108 109 if existing: 110 merged_snapshots[name] = existing.copy() 111 if modified: 112 modified_snapshots[name] = (existing, stored[modified.snapshot_id]) 113 for child, versions in existing.indirect_versions.items(): 114 existing_versions = snapshot_remote_versions.get(child) 115 if not existing_versions or existing_versions[1] < existing.created_ts: 116 snapshot_remote_versions[child] = ( 117 versions, 118 existing.created_ts, 119 ) 120 else: 121 snapshot = snapshot.copy() 122 merged_snapshots[name] = snapshot 123 new_snapshots[snapshot.snapshot_id] = snapshot 124 if modified: 125 snapshot.previous_versions = modified.all_versions 126 modified_snapshots[name] = (snapshot, stored[modified.snapshot_id]) 127 128 for snapshot in new_snapshots.values(): 129 if ( 130 snapshot.name in snapshot_remote_versions 131 and snapshot.previous_version 132 and snapshot.data_hash_matches(snapshot.previous_version) 133 ): 134 remote_versions = snapshot_remote_versions[snapshot.name][0] 135 remote_head = remote_versions[-1].version 136 local_head = snapshot.previous_version.version 137 138 if remote_head in (local.version for local in snapshot.previous_versions): 139 snapshot.set_version(local_head) 140 elif local_head in (remote.version for remote in remote_versions): 141 snapshot.set_version(remote_head) 142 else: 143 snapshot.set_version() 144 145 return ContextDiff( 146 environment=environment, 147 is_new_environment=is_new_environment, 148 create_from=create_from, 149 added=added, 150 removed=removed, 151 modified_snapshots=modified_snapshots, 152 snapshots=merged_snapshots, 153 new_snapshots=new_snapshots, 154 previous_plan_id=env.plan_id if env and not is_new_environment else None, 155 )
Create a ContextDiff object.
Arguments:
- environment: The remote environment to diff.
- snapshots: The snapshots of the current environment.
- create_from: The environment to create the target environment from if it doesn't exist.
- state_reader: StateReader to access the remote environment to diff.
Returns:
The ContextDiff object.
165 def directly_modified(self, model_name: str) -> bool: 166 """Returns whether or not a model was directly modified in this context. 167 168 Args: 169 model_name: The model name to check. 170 171 Returns: 172 Whether or not the model was directly modified. 173 """ 174 175 if model_name not in self.modified_snapshots: 176 return False 177 178 current, previous = self.modified_snapshots[model_name] 179 return current.fingerprint.data_hash != previous.fingerprint.data_hash
Returns whether or not a model was directly modified in this context.
Arguments:
- model_name: The model name to check.
Returns:
Whether or not the model was directly modified.
181 def indirectly_modified(self, model_name: str) -> bool: 182 """Returns whether or not a model was indirectly modified in this context. 183 184 Args: 185 model_name: The model name to check. 186 187 Returns: 188 Whether or not the model was indirectly modified. 189 """ 190 191 if model_name not in self.modified_snapshots: 192 return False 193 194 current, previous = self.modified_snapshots[model_name] 195 return ( 196 current.fingerprint.data_hash == previous.fingerprint.data_hash 197 and current.fingerprint.parent_data_hash != previous.fingerprint.parent_data_hash 198 )
Returns whether or not a model was indirectly modified in this context.
Arguments:
- model_name: The model name to check.
Returns:
Whether or not the model was indirectly modified.
200 def metadata_updated(self, model_name: str) -> bool: 201 """Returns whether or not the given model's metadata has been updated. 202 203 Args: 204 model_name: The model name to check. 205 206 Returns: 207 Whether or not the model's metadata has been updated. 208 """ 209 210 if model_name not in self.modified_snapshots: 211 return False 212 213 current, previous = self.modified_snapshots[model_name] 214 return current.fingerprint.metadata_hash != previous.fingerprint.metadata_hash
Returns whether or not the given model's metadata has been updated.
Arguments:
- model_name: The model name to check.
Returns:
Whether or not the model's metadata has been updated.
216 def text_diff(self, model: str) -> str: 217 """Finds the difference of a model between the current and remote environment. 218 219 Args: 220 model: The model name. 221 222 Returns: 223 A unified text diff of the model. 224 """ 225 if model not in self.snapshots: 226 raise SQLMeshError(f"`{model}` does not exist.") 227 if model not in self.modified_snapshots: 228 return "" 229 230 new, old = self.modified_snapshots[model] 231 return old.model.text_diff(new.model)
Finds the difference of a model between the current and remote environment.
Arguments:
- model: The model name.
Returns:
A unified text diff of the model.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs