sqlmesh.core.state_sync.common
1from __future__ import annotations 2 3import abc 4import logging 5import typing as t 6from collections import defaultdict 7from functools import wraps 8 9from sqlmesh.core.engine_adapter.shared import TransactionType 10from sqlmesh.core.environment import Environment 11from sqlmesh.core.snapshot import ( 12 Snapshot, 13 SnapshotId, 14 SnapshotIdLike, 15 SnapshotInfoLike, 16 SnapshotNameVersionLike, 17 SnapshotTableInfo, 18) 19from sqlmesh.core.state_sync.base import StateSync 20from sqlmesh.utils.date import TimeLike, now, to_datetime 21from sqlmesh.utils.errors import SQLMeshError 22 23logger = logging.getLogger(__name__) 24 25 26def transactional( 27 transaction_type: TransactionType = TransactionType.DML, 28) -> t.Callable[[t.Callable], t.Callable]: 29 def decorator(func: t.Callable) -> t.Callable: 30 @wraps(func) 31 def wrapper(self: t.Any, *args: t.Any, **kwargs: t.Any) -> t.Any: 32 if not hasattr(self, "_transaction"): 33 return func(self, *args, **kwargs) 34 35 with self._transaction(transaction_type): 36 return func(self, *args, **kwargs) 37 38 return wrapper 39 40 return decorator 41 42 43class CommonStateSyncMixin(StateSync): 44 def get_snapshots( 45 self, snapshot_ids: t.Optional[t.Iterable[SnapshotIdLike]] 46 ) -> t.Dict[SnapshotId, Snapshot]: 47 return self._get_snapshots(snapshot_ids) 48 49 def get_snapshots_with_same_version( 50 self, snapshots: t.Iterable[SnapshotNameVersionLike] 51 ) -> t.List[Snapshot]: 52 return self._get_snapshots_with_same_version(snapshots) 53 54 def get_environment(self, environment: str) -> t.Optional[Environment]: 55 return self._get_environment(environment) 56 57 def get_snapshots_by_models( 58 self, *names: str, lock_for_update: bool = False 59 ) -> t.List[Snapshot]: 60 """ 61 Get all snapshots by model name. 62 63 Returns: 64 The list of snapshots. 65 """ 66 return [ 67 snapshot 68 for snapshot in self._get_snapshots(lock_for_update=lock_for_update).values() 69 if snapshot.name in names 70 ] 71 72 @transactional() 73 def promote( 74 self, environment: Environment, no_gaps: bool = False 75 ) -> t.Tuple[t.List[SnapshotTableInfo], t.List[SnapshotTableInfo]]: 76 """Update the environment to reflect the current state. 77 78 This method verifies that snapshots have been pushed. 79 80 Args: 81 environment: The environment to promote. 82 no_gaps: Whether to ensure that new snapshots for models that are already a 83 part of the target environment have no data gaps when compared against previous 84 snapshots for same models. 85 86 Returns: 87 A tuple of (added snapshot table infos, removed snapshot table infos) 88 """ 89 logger.info("Promoting environment '%s'", environment.name) 90 91 snapshot_ids = set(snapshot.snapshot_id for snapshot in environment.snapshots) 92 snapshots = self._get_snapshots(snapshot_ids, lock_for_update=True).values() 93 missing = snapshot_ids - {snapshot.snapshot_id for snapshot in snapshots} 94 if missing: 95 raise SQLMeshError( 96 f"Missing snapshots {missing}. Make sure to push and backfill your snapshots." 97 ) 98 99 existing_environment = self._get_environment(environment.name, lock_for_update=True) 100 101 if existing_environment: 102 if environment.previous_plan_id != existing_environment.plan_id: 103 raise SQLMeshError( 104 f"Plan '{environment.plan_id}' is no longer valid for the target environment '{environment.name}'. " 105 f"Expected previous plan ID: '{environment.previous_plan_id}', actual previous plan ID: '{existing_environment.plan_id}'. " 106 "Please recreate the plan and try again" 107 ) 108 109 if no_gaps: 110 self._ensure_no_gaps(snapshots, existing_environment) 111 112 existing_table_infos = { 113 table_info.name: table_info for table_info in existing_environment.snapshots 114 } 115 else: 116 existing_table_infos = {} 117 118 missing_models = set(existing_table_infos) - {snapshot.name for snapshot in snapshots} 119 120 for snapshot in snapshots: 121 existing_table_infos.get(snapshot.name) 122 for parent in snapshot.parents: 123 if parent not in snapshot_ids: 124 raise SQLMeshError( 125 f"Cannot promote snapshot `{snapshot.name}` because its parent `{parent.name}:{parent.identifier}` is not promoted. Did you mean to promote all snapshots instead of a subset?" 126 ) 127 128 table_infos = [s.table_info for s in snapshots] 129 self._update_environment(environment) 130 return table_infos, [existing_table_infos[name] for name in missing_models] 131 132 @transactional() 133 def delete_expired_snapshots(self) -> t.List[Snapshot]: 134 current_time = now() 135 136 snapshots_by_version = defaultdict(list) 137 for s in self._get_snapshots().values(): 138 snapshots_by_version[(s.name, s.version)].append(s) 139 140 promoted_snapshot_ids = { 141 snapshot.snapshot_id 142 for environment in self.get_environments() 143 for snapshot in environment.snapshots 144 } 145 146 def _is_snapshot_used(snapshot: Snapshot) -> bool: 147 return ( 148 snapshot.snapshot_id in promoted_snapshot_ids 149 or to_datetime(snapshot.ttl, relative_base=to_datetime(snapshot.updated_ts)) 150 > current_time 151 ) 152 153 expired_snapshots = [] 154 155 for snapshots in snapshots_by_version.values(): 156 if any(map(_is_snapshot_used, snapshots)): 157 continue 158 159 for snapshot in snapshots: 160 expired_snapshots.append(snapshot) 161 162 if expired_snapshots: 163 self.delete_snapshots(expired_snapshots) 164 165 return expired_snapshots 166 167 @transactional() 168 def add_interval( 169 self, 170 snapshot_id: SnapshotIdLike, 171 start: TimeLike, 172 end: TimeLike, 173 is_dev: bool = False, 174 ) -> None: 175 snapshot_id = snapshot_id.snapshot_id 176 stored_snapshots = self._get_snapshots([snapshot_id], lock_for_update=True) 177 if snapshot_id not in stored_snapshots: 178 raise SQLMeshError(f"Snapshot {snapshot_id} was not found") 179 180 logger.info("Adding interval for snapshot %s", snapshot_id) 181 stored_snapshot = stored_snapshots[snapshot_id] 182 stored_snapshot.add_interval(start, end, is_dev=is_dev) 183 self._update_snapshot(stored_snapshot) 184 185 @transactional() 186 def remove_interval( 187 self, 188 snapshots: t.Iterable[SnapshotInfoLike], 189 start: TimeLike, 190 end: TimeLike, 191 all_snapshots: t.Optional[t.Iterable[Snapshot]] = None, 192 ) -> None: 193 all_snapshots = all_snapshots or self._get_snapshots_with_same_version( 194 snapshots, lock_for_update=True 195 ) 196 for snapshot in all_snapshots: 197 logger.info("Removing interval for snapshot %s", snapshot.snapshot_id) 198 snapshot.remove_interval(start, end) 199 self._update_snapshot(snapshot) 200 201 @transactional() 202 def unpause_snapshots( 203 self, snapshots: t.Iterable[SnapshotInfoLike], unpaused_dt: TimeLike 204 ) -> None: 205 target_snapshot_ids = {s.snapshot_id for s in snapshots} 206 snapshots = self._get_snapshots_with_same_version(snapshots, lock_for_update=True) 207 for snapshot in snapshots: 208 is_target_snapshot = snapshot.snapshot_id in target_snapshot_ids 209 if is_target_snapshot and not snapshot.unpaused_ts: 210 logger.info(f"Unpausing snapshot %s", snapshot.snapshot_id) 211 snapshot.set_unpaused_ts(unpaused_dt) 212 self._update_snapshot(snapshot) 213 elif not is_target_snapshot and snapshot.unpaused_ts: 214 logger.info(f"Pausing snapshot %s", snapshot.snapshot_id) 215 snapshot.set_unpaused_ts(None) 216 self._update_snapshot(snapshot) 217 218 def _ensure_no_gaps( 219 self, target_snapshots: t.Iterable[Snapshot], target_environment: Environment 220 ) -> None: 221 target_snapshots_by_name = {s.name: s for s in target_snapshots} 222 223 changed_version_prev_snapshots_by_name = { 224 s.name: s 225 for s in target_environment.snapshots 226 if s.name in target_snapshots_by_name 227 and target_snapshots_by_name[s.name].version != s.version 228 } 229 230 changed_version_target_snapshots = [ 231 t for t in target_snapshots if t.name in changed_version_prev_snapshots_by_name 232 ] 233 234 all_snapshots = { 235 s.snapshot_id: s 236 for s in self._get_snapshots_with_same_version( 237 [ 238 *changed_version_prev_snapshots_by_name.values(), 239 *changed_version_target_snapshots, 240 ] 241 ) 242 } 243 244 merged_prev_snapshots = Snapshot.merge_snapshots( 245 changed_version_prev_snapshots_by_name.values(), all_snapshots 246 ) 247 merged_target_snapshots = Snapshot.merge_snapshots( 248 changed_version_target_snapshots, all_snapshots 249 ) 250 merged_target_snapshots_by_name = {s.name: s for s in merged_target_snapshots} 251 252 for prev_snapshot in merged_prev_snapshots: 253 target_snapshot = merged_target_snapshots_by_name[prev_snapshot.name] 254 if ( 255 target_snapshot.is_incremental_by_time_range_kind 256 and prev_snapshot.is_incremental_by_time_range_kind 257 and prev_snapshot.intervals 258 ): 259 missing_intervals = target_snapshot.missing_intervals( 260 prev_snapshot.intervals[0][0], 261 prev_snapshot.intervals[-1][1], 262 ) 263 if missing_intervals: 264 raise SQLMeshError( 265 f"Detected gaps in snapshot {target_snapshot.snapshot_id}: {missing_intervals}" 266 ) 267 268 @abc.abstractmethod 269 def _update_environment(self, environment: Environment) -> None: 270 """Overwrites the target environment with a given environment. 271 272 Args: 273 environment: The new environment. 274 """ 275 276 @abc.abstractmethod 277 def _update_snapshot(self, snapshot: Snapshot) -> None: 278 """Updates the target snapshot. 279 280 Args: 281 snapshot: The target snapshot. 282 """ 283 284 @abc.abstractmethod 285 def _get_snapshots( 286 self, 287 snapshot_ids: t.Optional[t.Iterable[SnapshotIdLike]] = None, 288 lock_for_update: bool = False, 289 ) -> t.Dict[SnapshotId, Snapshot]: 290 """Fetches specified snapshots. 291 292 Args: 293 snapshot_ids: The collection of IDs of snapshots to fetch 294 lock_for_update: Lock the snapshot rows for future update 295 296 Returns: 297 A dictionary of snapshot ids to snapshots for ones that could be found. 298 """ 299 300 @abc.abstractmethod 301 def _get_snapshots_with_same_version( 302 self, 303 snapshots: t.Iterable[SnapshotNameVersionLike], 304 lock_for_update: bool = False, 305 ) -> t.List[Snapshot]: 306 """Fetches all snapshots that share the same version as the snapshots. 307 308 The output includes the snapshots with the specified version. 309 310 Args: 311 snapshots: The collection of target name / version pairs. 312 lock_for_update: Lock the snapshot rows for future update 313 314 Returns: 315 The list of Snapshot objects. 316 """ 317 318 @abc.abstractmethod 319 def _get_environment( 320 self, environment: str, lock_for_update: bool = False 321 ) -> t.Optional[Environment]: 322 """Fetches the environment if it exists. 323 324 Args: 325 environment: The target environment name. 326 lock_for_update: Lock the snapshot rows for future update 327 328 Returns: 329 The target environment. 330 """
27def transactional( 28 transaction_type: TransactionType = TransactionType.DML, 29) -> t.Callable[[t.Callable], t.Callable]: 30 def decorator(func: t.Callable) -> t.Callable: 31 @wraps(func) 32 def wrapper(self: t.Any, *args: t.Any, **kwargs: t.Any) -> t.Any: 33 if not hasattr(self, "_transaction"): 34 return func(self, *args, **kwargs) 35 36 with self._transaction(transaction_type): 37 return func(self, *args, **kwargs) 38 39 return wrapper 40 41 return decorator
44class CommonStateSyncMixin(StateSync): 45 def get_snapshots( 46 self, snapshot_ids: t.Optional[t.Iterable[SnapshotIdLike]] 47 ) -> t.Dict[SnapshotId, Snapshot]: 48 return self._get_snapshots(snapshot_ids) 49 50 def get_snapshots_with_same_version( 51 self, snapshots: t.Iterable[SnapshotNameVersionLike] 52 ) -> t.List[Snapshot]: 53 return self._get_snapshots_with_same_version(snapshots) 54 55 def get_environment(self, environment: str) -> t.Optional[Environment]: 56 return self._get_environment(environment) 57 58 def get_snapshots_by_models( 59 self, *names: str, lock_for_update: bool = False 60 ) -> t.List[Snapshot]: 61 """ 62 Get all snapshots by model name. 63 64 Returns: 65 The list of snapshots. 66 """ 67 return [ 68 snapshot 69 for snapshot in self._get_snapshots(lock_for_update=lock_for_update).values() 70 if snapshot.name in names 71 ] 72 73 @transactional() 74 def promote( 75 self, environment: Environment, no_gaps: bool = False 76 ) -> t.Tuple[t.List[SnapshotTableInfo], t.List[SnapshotTableInfo]]: 77 """Update the environment to reflect the current state. 78 79 This method verifies that snapshots have been pushed. 80 81 Args: 82 environment: The environment to promote. 83 no_gaps: Whether to ensure that new snapshots for models that are already a 84 part of the target environment have no data gaps when compared against previous 85 snapshots for same models. 86 87 Returns: 88 A tuple of (added snapshot table infos, removed snapshot table infos) 89 """ 90 logger.info("Promoting environment '%s'", environment.name) 91 92 snapshot_ids = set(snapshot.snapshot_id for snapshot in environment.snapshots) 93 snapshots = self._get_snapshots(snapshot_ids, lock_for_update=True).values() 94 missing = snapshot_ids - {snapshot.snapshot_id for snapshot in snapshots} 95 if missing: 96 raise SQLMeshError( 97 f"Missing snapshots {missing}. Make sure to push and backfill your snapshots." 98 ) 99 100 existing_environment = self._get_environment(environment.name, lock_for_update=True) 101 102 if existing_environment: 103 if environment.previous_plan_id != existing_environment.plan_id: 104 raise SQLMeshError( 105 f"Plan '{environment.plan_id}' is no longer valid for the target environment '{environment.name}'. " 106 f"Expected previous plan ID: '{environment.previous_plan_id}', actual previous plan ID: '{existing_environment.plan_id}'. " 107 "Please recreate the plan and try again" 108 ) 109 110 if no_gaps: 111 self._ensure_no_gaps(snapshots, existing_environment) 112 113 existing_table_infos = { 114 table_info.name: table_info for table_info in existing_environment.snapshots 115 } 116 else: 117 existing_table_infos = {} 118 119 missing_models = set(existing_table_infos) - {snapshot.name for snapshot in snapshots} 120 121 for snapshot in snapshots: 122 existing_table_infos.get(snapshot.name) 123 for parent in snapshot.parents: 124 if parent not in snapshot_ids: 125 raise SQLMeshError( 126 f"Cannot promote snapshot `{snapshot.name}` because its parent `{parent.name}:{parent.identifier}` is not promoted. Did you mean to promote all snapshots instead of a subset?" 127 ) 128 129 table_infos = [s.table_info for s in snapshots] 130 self._update_environment(environment) 131 return table_infos, [existing_table_infos[name] for name in missing_models] 132 133 @transactional() 134 def delete_expired_snapshots(self) -> t.List[Snapshot]: 135 current_time = now() 136 137 snapshots_by_version = defaultdict(list) 138 for s in self._get_snapshots().values(): 139 snapshots_by_version[(s.name, s.version)].append(s) 140 141 promoted_snapshot_ids = { 142 snapshot.snapshot_id 143 for environment in self.get_environments() 144 for snapshot in environment.snapshots 145 } 146 147 def _is_snapshot_used(snapshot: Snapshot) -> bool: 148 return ( 149 snapshot.snapshot_id in promoted_snapshot_ids 150 or to_datetime(snapshot.ttl, relative_base=to_datetime(snapshot.updated_ts)) 151 > current_time 152 ) 153 154 expired_snapshots = [] 155 156 for snapshots in snapshots_by_version.values(): 157 if any(map(_is_snapshot_used, snapshots)): 158 continue 159 160 for snapshot in snapshots: 161 expired_snapshots.append(snapshot) 162 163 if expired_snapshots: 164 self.delete_snapshots(expired_snapshots) 165 166 return expired_snapshots 167 168 @transactional() 169 def add_interval( 170 self, 171 snapshot_id: SnapshotIdLike, 172 start: TimeLike, 173 end: TimeLike, 174 is_dev: bool = False, 175 ) -> None: 176 snapshot_id = snapshot_id.snapshot_id 177 stored_snapshots = self._get_snapshots([snapshot_id], lock_for_update=True) 178 if snapshot_id not in stored_snapshots: 179 raise SQLMeshError(f"Snapshot {snapshot_id} was not found") 180 181 logger.info("Adding interval for snapshot %s", snapshot_id) 182 stored_snapshot = stored_snapshots[snapshot_id] 183 stored_snapshot.add_interval(start, end, is_dev=is_dev) 184 self._update_snapshot(stored_snapshot) 185 186 @transactional() 187 def remove_interval( 188 self, 189 snapshots: t.Iterable[SnapshotInfoLike], 190 start: TimeLike, 191 end: TimeLike, 192 all_snapshots: t.Optional[t.Iterable[Snapshot]] = None, 193 ) -> None: 194 all_snapshots = all_snapshots or self._get_snapshots_with_same_version( 195 snapshots, lock_for_update=True 196 ) 197 for snapshot in all_snapshots: 198 logger.info("Removing interval for snapshot %s", snapshot.snapshot_id) 199 snapshot.remove_interval(start, end) 200 self._update_snapshot(snapshot) 201 202 @transactional() 203 def unpause_snapshots( 204 self, snapshots: t.Iterable[SnapshotInfoLike], unpaused_dt: TimeLike 205 ) -> None: 206 target_snapshot_ids = {s.snapshot_id for s in snapshots} 207 snapshots = self._get_snapshots_with_same_version(snapshots, lock_for_update=True) 208 for snapshot in snapshots: 209 is_target_snapshot = snapshot.snapshot_id in target_snapshot_ids 210 if is_target_snapshot and not snapshot.unpaused_ts: 211 logger.info(f"Unpausing snapshot %s", snapshot.snapshot_id) 212 snapshot.set_unpaused_ts(unpaused_dt) 213 self._update_snapshot(snapshot) 214 elif not is_target_snapshot and snapshot.unpaused_ts: 215 logger.info(f"Pausing snapshot %s", snapshot.snapshot_id) 216 snapshot.set_unpaused_ts(None) 217 self._update_snapshot(snapshot) 218 219 def _ensure_no_gaps( 220 self, target_snapshots: t.Iterable[Snapshot], target_environment: Environment 221 ) -> None: 222 target_snapshots_by_name = {s.name: s for s in target_snapshots} 223 224 changed_version_prev_snapshots_by_name = { 225 s.name: s 226 for s in target_environment.snapshots 227 if s.name in target_snapshots_by_name 228 and target_snapshots_by_name[s.name].version != s.version 229 } 230 231 changed_version_target_snapshots = [ 232 t for t in target_snapshots if t.name in changed_version_prev_snapshots_by_name 233 ] 234 235 all_snapshots = { 236 s.snapshot_id: s 237 for s in self._get_snapshots_with_same_version( 238 [ 239 *changed_version_prev_snapshots_by_name.values(), 240 *changed_version_target_snapshots, 241 ] 242 ) 243 } 244 245 merged_prev_snapshots = Snapshot.merge_snapshots( 246 changed_version_prev_snapshots_by_name.values(), all_snapshots 247 ) 248 merged_target_snapshots = Snapshot.merge_snapshots( 249 changed_version_target_snapshots, all_snapshots 250 ) 251 merged_target_snapshots_by_name = {s.name: s for s in merged_target_snapshots} 252 253 for prev_snapshot in merged_prev_snapshots: 254 target_snapshot = merged_target_snapshots_by_name[prev_snapshot.name] 255 if ( 256 target_snapshot.is_incremental_by_time_range_kind 257 and prev_snapshot.is_incremental_by_time_range_kind 258 and prev_snapshot.intervals 259 ): 260 missing_intervals = target_snapshot.missing_intervals( 261 prev_snapshot.intervals[0][0], 262 prev_snapshot.intervals[-1][1], 263 ) 264 if missing_intervals: 265 raise SQLMeshError( 266 f"Detected gaps in snapshot {target_snapshot.snapshot_id}: {missing_intervals}" 267 ) 268 269 @abc.abstractmethod 270 def _update_environment(self, environment: Environment) -> None: 271 """Overwrites the target environment with a given environment. 272 273 Args: 274 environment: The new environment. 275 """ 276 277 @abc.abstractmethod 278 def _update_snapshot(self, snapshot: Snapshot) -> None: 279 """Updates the target snapshot. 280 281 Args: 282 snapshot: The target snapshot. 283 """ 284 285 @abc.abstractmethod 286 def _get_snapshots( 287 self, 288 snapshot_ids: t.Optional[t.Iterable[SnapshotIdLike]] = None, 289 lock_for_update: bool = False, 290 ) -> t.Dict[SnapshotId, Snapshot]: 291 """Fetches specified snapshots. 292 293 Args: 294 snapshot_ids: The collection of IDs of snapshots to fetch 295 lock_for_update: Lock the snapshot rows for future update 296 297 Returns: 298 A dictionary of snapshot ids to snapshots for ones that could be found. 299 """ 300 301 @abc.abstractmethod 302 def _get_snapshots_with_same_version( 303 self, 304 snapshots: t.Iterable[SnapshotNameVersionLike], 305 lock_for_update: bool = False, 306 ) -> t.List[Snapshot]: 307 """Fetches all snapshots that share the same version as the snapshots. 308 309 The output includes the snapshots with the specified version. 310 311 Args: 312 snapshots: The collection of target name / version pairs. 313 lock_for_update: Lock the snapshot rows for future update 314 315 Returns: 316 The list of Snapshot objects. 317 """ 318 319 @abc.abstractmethod 320 def _get_environment( 321 self, environment: str, lock_for_update: bool = False 322 ) -> t.Optional[Environment]: 323 """Fetches the environment if it exists. 324 325 Args: 326 environment: The target environment name. 327 lock_for_update: Lock the snapshot rows for future update 328 329 Returns: 330 The target environment. 331 """
Abstract base class for snapshot and environment state management.
45 def get_snapshots( 46 self, snapshot_ids: t.Optional[t.Iterable[SnapshotIdLike]] 47 ) -> t.Dict[SnapshotId, Snapshot]: 48 return self._get_snapshots(snapshot_ids)
Bulk fetch snapshots given the corresponding snapshot ids.
Arguments:
- snapshot_ids: Iterable of snapshot ids to get. If not provided all available snapshots will be returned.
Returns:
A dictionary of snapshot ids to snapshots for ones that could be found.
50 def get_snapshots_with_same_version( 51 self, snapshots: t.Iterable[SnapshotNameVersionLike] 52 ) -> t.List[Snapshot]: 53 return self._get_snapshots_with_same_version(snapshots)
Fetches all snapshots that share the same version as the snapshots.
The output includes the snapshots with the specified version.
Arguments:
- snapshots: The collection of target name / version pairs.
Returns:
The list of Snapshot objects.
55 def get_environment(self, environment: str) -> t.Optional[Environment]: 56 return self._get_environment(environment)
Fetches the environment if it exists.
Arguments:
- environment: The environment
Returns:
The environment object.
58 def get_snapshots_by_models( 59 self, *names: str, lock_for_update: bool = False 60 ) -> t.List[Snapshot]: 61 """ 62 Get all snapshots by model name. 63 64 Returns: 65 The list of snapshots. 66 """ 67 return [ 68 snapshot 69 for snapshot in self._get_snapshots(lock_for_update=lock_for_update).values() 70 if snapshot.name in names 71 ]
Get all snapshots by model name.
Returns:
The list of snapshots.
73 @transactional() 74 def promote( 75 self, environment: Environment, no_gaps: bool = False 76 ) -> t.Tuple[t.List[SnapshotTableInfo], t.List[SnapshotTableInfo]]: 77 """Update the environment to reflect the current state. 78 79 This method verifies that snapshots have been pushed. 80 81 Args: 82 environment: The environment to promote. 83 no_gaps: Whether to ensure that new snapshots for models that are already a 84 part of the target environment have no data gaps when compared against previous 85 snapshots for same models. 86 87 Returns: 88 A tuple of (added snapshot table infos, removed snapshot table infos) 89 """ 90 logger.info("Promoting environment '%s'", environment.name) 91 92 snapshot_ids = set(snapshot.snapshot_id for snapshot in environment.snapshots) 93 snapshots = self._get_snapshots(snapshot_ids, lock_for_update=True).values() 94 missing = snapshot_ids - {snapshot.snapshot_id for snapshot in snapshots} 95 if missing: 96 raise SQLMeshError( 97 f"Missing snapshots {missing}. Make sure to push and backfill your snapshots." 98 ) 99 100 existing_environment = self._get_environment(environment.name, lock_for_update=True) 101 102 if existing_environment: 103 if environment.previous_plan_id != existing_environment.plan_id: 104 raise SQLMeshError( 105 f"Plan '{environment.plan_id}' is no longer valid for the target environment '{environment.name}'. " 106 f"Expected previous plan ID: '{environment.previous_plan_id}', actual previous plan ID: '{existing_environment.plan_id}'. " 107 "Please recreate the plan and try again" 108 ) 109 110 if no_gaps: 111 self._ensure_no_gaps(snapshots, existing_environment) 112 113 existing_table_infos = { 114 table_info.name: table_info for table_info in existing_environment.snapshots 115 } 116 else: 117 existing_table_infos = {} 118 119 missing_models = set(existing_table_infos) - {snapshot.name for snapshot in snapshots} 120 121 for snapshot in snapshots: 122 existing_table_infos.get(snapshot.name) 123 for parent in snapshot.parents: 124 if parent not in snapshot_ids: 125 raise SQLMeshError( 126 f"Cannot promote snapshot `{snapshot.name}` because its parent `{parent.name}:{parent.identifier}` is not promoted. Did you mean to promote all snapshots instead of a subset?" 127 ) 128 129 table_infos = [s.table_info for s in snapshots] 130 self._update_environment(environment) 131 return table_infos, [existing_table_infos[name] for name in missing_models]
Update the environment to reflect the current state.
This method verifies that snapshots have been pushed.
Arguments:
- environment: The environment to promote.
- no_gaps: Whether to ensure that new snapshots for models that are already a part of the target environment have no data gaps when compared against previous snapshots for same models.
Returns:
A tuple of (added snapshot table infos, removed snapshot table infos)
133 @transactional() 134 def delete_expired_snapshots(self) -> t.List[Snapshot]: 135 current_time = now() 136 137 snapshots_by_version = defaultdict(list) 138 for s in self._get_snapshots().values(): 139 snapshots_by_version[(s.name, s.version)].append(s) 140 141 promoted_snapshot_ids = { 142 snapshot.snapshot_id 143 for environment in self.get_environments() 144 for snapshot in environment.snapshots 145 } 146 147 def _is_snapshot_used(snapshot: Snapshot) -> bool: 148 return ( 149 snapshot.snapshot_id in promoted_snapshot_ids 150 or to_datetime(snapshot.ttl, relative_base=to_datetime(snapshot.updated_ts)) 151 > current_time 152 ) 153 154 expired_snapshots = [] 155 156 for snapshots in snapshots_by_version.values(): 157 if any(map(_is_snapshot_used, snapshots)): 158 continue 159 160 for snapshot in snapshots: 161 expired_snapshots.append(snapshot) 162 163 if expired_snapshots: 164 self.delete_snapshots(expired_snapshots) 165 166 return expired_snapshots
Removes expired snapshots.
Expired snapshots are snapshots that have exceeded their time-to-live and are no longer in use within an environment.
Returns:
The list of removed snapshots.
168 @transactional() 169 def add_interval( 170 self, 171 snapshot_id: SnapshotIdLike, 172 start: TimeLike, 173 end: TimeLike, 174 is_dev: bool = False, 175 ) -> None: 176 snapshot_id = snapshot_id.snapshot_id 177 stored_snapshots = self._get_snapshots([snapshot_id], lock_for_update=True) 178 if snapshot_id not in stored_snapshots: 179 raise SQLMeshError(f"Snapshot {snapshot_id} was not found") 180 181 logger.info("Adding interval for snapshot %s", snapshot_id) 182 stored_snapshot = stored_snapshots[snapshot_id] 183 stored_snapshot.add_interval(start, end, is_dev=is_dev) 184 self._update_snapshot(stored_snapshot)
Add an interval to a snapshot and sync it to the store.
Snapshots must be pushed before adding intervals to them.
Arguments:
- snapshot_id: The snapshot like object to add an interval to.
- start: The start of the interval to add.
- end: The end of the interval to add.
- is_dev: Indicates whether the given interval is being added while in development mode.
186 @transactional() 187 def remove_interval( 188 self, 189 snapshots: t.Iterable[SnapshotInfoLike], 190 start: TimeLike, 191 end: TimeLike, 192 all_snapshots: t.Optional[t.Iterable[Snapshot]] = None, 193 ) -> None: 194 all_snapshots = all_snapshots or self._get_snapshots_with_same_version( 195 snapshots, lock_for_update=True 196 ) 197 for snapshot in all_snapshots: 198 logger.info("Removing interval for snapshot %s", snapshot.snapshot_id) 199 snapshot.remove_interval(start, end) 200 self._update_snapshot(snapshot)
Remove an interval from a list of snapshots and sync it to the store.
Because multiple snapshots can be pointing to the same version or physical table, this method can also grab all snapshots tied to the passed in version.
Arguments:
- snapshots: The snapshot info like object to remove intervals from.
- start: The start of the interval to add.
- end: The end of the interval to add.
- all_snapshots: All snapshots can be passed in to skip fetching matching snapshot versions.
202 @transactional() 203 def unpause_snapshots( 204 self, snapshots: t.Iterable[SnapshotInfoLike], unpaused_dt: TimeLike 205 ) -> None: 206 target_snapshot_ids = {s.snapshot_id for s in snapshots} 207 snapshots = self._get_snapshots_with_same_version(snapshots, lock_for_update=True) 208 for snapshot in snapshots: 209 is_target_snapshot = snapshot.snapshot_id in target_snapshot_ids 210 if is_target_snapshot and not snapshot.unpaused_ts: 211 logger.info(f"Unpausing snapshot %s", snapshot.snapshot_id) 212 snapshot.set_unpaused_ts(unpaused_dt) 213 self._update_snapshot(snapshot) 214 elif not is_target_snapshot and snapshot.unpaused_ts: 215 logger.info(f"Pausing snapshot %s", snapshot.snapshot_id) 216 snapshot.set_unpaused_ts(None) 217 self._update_snapshot(snapshot)
Unpauses target snapshots.
Unpaused snapshots are scheduled for evaluation on a recurring basis. Once unpaused a snapshot can't be paused again.
Arguments:
- snapshots: Target snapshots.
- unpaused_dt: The datetime object which indicates when target snapshots were unpaused.