StateSync
State sync is how SQLMesh keeps track of environments and their states, e.g. snapshots.
StateReader
StateReader provides a subset of the functionalities of the StateSync class. As its name implies, it only allows for read-only operations on snapshots and environment states.
EngineAdapterStateSync
The provided sqlmesh.core.state_sync.EngineAdapterStateSync
leverages an existing engine
adapter to read and write state to the underlying data store.
1""" 2# StateSync 3 4State sync is how SQLMesh keeps track of environments and their states, e.g. snapshots. 5 6# StateReader 7 8StateReader provides a subset of the functionalities of the StateSync class. As its name 9implies, it only allows for read-only operations on snapshots and environment states. 10 11# EngineAdapterStateSync 12 13The provided `sqlmesh.core.state_sync.EngineAdapterStateSync` leverages an existing engine 14adapter to read and write state to the underlying data store. 15""" 16from __future__ import annotations 17 18import contextlib 19import json 20import logging 21import typing as t 22 23from sqlglot import exp 24 25from sqlmesh.core.dialect import select_from_values 26from sqlmesh.core.engine_adapter import EngineAdapter, TransactionType 27from sqlmesh.core.environment import Environment 28from sqlmesh.core.snapshot import ( 29 Snapshot, 30 SnapshotId, 31 SnapshotIdLike, 32 SnapshotNameVersionLike, 33) 34from sqlmesh.core.state_sync.base import StateSync 35from sqlmesh.core.state_sync.common import CommonStateSyncMixin, transactional 36from sqlmesh.utils.date import now_timestamp 37from sqlmesh.utils.errors import SQLMeshError 38 39logger = logging.getLogger(__name__) 40 41 42class EngineAdapterStateSync(CommonStateSyncMixin, StateSync): 43 """Manages state of models and snapshot with an existing engine adapter. 44 45 This state sync is convenient to use because it requires no additional setup. 46 You can reuse the same engine/warehouse that your data is stored in. 47 48 Args: 49 engine_adapter: The EngineAdapter to use to store and fetch snapshots. 50 schema: The schema to store state metadata in. 51 """ 52 53 def __init__( 54 self, 55 engine_adapter: EngineAdapter, 56 schema: str, 57 ): 58 self.engine_adapter = engine_adapter 59 self.snapshots_table = f"{schema}._snapshots" 60 self.environments_table = f"{schema}._environments" 61 62 @property 63 def snapshot_columns_to_types(self) -> t.Dict[str, exp.DataType]: 64 return { 65 "name": exp.DataType.build("text"), 66 "identifier": exp.DataType.build("text"), 67 "version": exp.DataType.build("text"), 68 "snapshot": exp.DataType.build("text"), 69 } 70 71 @property 72 def environment_columns_to_types(self) -> t.Dict[str, exp.DataType]: 73 return { 74 "name": exp.DataType.build("text"), 75 "snapshots": exp.DataType.build("text"), 76 "start_at": exp.DataType.build("text"), 77 "end_at": exp.DataType.build("text"), 78 "plan_id": exp.DataType.build("text"), 79 "previous_plan_id": exp.DataType.build("text"), 80 "expiration_ts": exp.DataType.build("bigint"), 81 } 82 83 @transactional(transaction_type=TransactionType.DDL) 84 def init_schema(self) -> None: 85 """Creates the schema and table to store state.""" 86 self.engine_adapter.create_schema(self.snapshots_table) 87 88 self.engine_adapter.create_state_table( 89 self.snapshots_table, 90 self.snapshot_columns_to_types, 91 primary_key=("name", "identifier"), 92 ) 93 94 self.engine_adapter.create_index( 95 self.snapshots_table, "name_version_idx", ("name", "version") 96 ) 97 98 self.engine_adapter.create_state_table( 99 self.environments_table, 100 self.environment_columns_to_types, 101 primary_key=("name",), 102 ) 103 104 @transactional() 105 def push_snapshots(self, snapshots: t.Iterable[Snapshot]) -> None: 106 """Pushes snapshots to the state store, merging them with existing ones. 107 108 This method first finds all existing snapshots in the store and merges them with 109 the local snapshots. It will then delete all existing snapshots and then 110 insert all the local snapshots. This can be made safer with locks or merge/upsert. 111 112 Args: 113 snapshot_ids: Iterable of snapshot ids to bulk push. 114 """ 115 snapshots_by_id = {} 116 for snapshot in snapshots: 117 if not snapshot.version: 118 raise SQLMeshError( 119 f"Snapshot {snapshot} has not been versioned yet. Create a plan before pushing a snapshot." 120 ) 121 snapshots_by_id[snapshot.snapshot_id] = snapshot 122 123 existing = self.snapshots_exist(snapshots_by_id) 124 125 if existing: 126 raise SQLMeshError(f"Snapshots {existing} already exists.") 127 128 snapshots = snapshots_by_id.values() 129 130 if snapshots: 131 self._push_snapshots(snapshots) 132 133 def _push_snapshots(self, snapshots: t.Iterable[Snapshot], overwrite: bool = False) -> None: 134 if overwrite: 135 snapshots = tuple(snapshots) 136 self.delete_snapshots(snapshots) 137 138 self.engine_adapter.insert_append( 139 self.snapshots_table, 140 next( 141 select_from_values( 142 [ 143 ( 144 snapshot.name, 145 snapshot.identifier, 146 snapshot.version, 147 snapshot.json(), 148 ) 149 for snapshot in snapshots 150 ], 151 columns_to_types=self.snapshot_columns_to_types, 152 ) 153 ), 154 contains_json=True, 155 ) 156 157 def delete_expired_environments(self) -> t.List[Environment]: 158 now_ts = now_timestamp() 159 filter_expr = exp.LTE( 160 this=exp.to_column("expiration_ts"), 161 expression=exp.Literal.number(now_ts), 162 ) 163 164 rows = self.engine_adapter.fetchall( 165 self._environments_query( 166 where=filter_expr, 167 lock_for_update=True, 168 ), 169 ignore_unsupported_errors=True, 170 ) 171 environments = [self._environment_from_row(r) for r in rows] 172 173 self.engine_adapter.delete_from( 174 self.environments_table, 175 where=filter_expr, 176 ) 177 178 return environments 179 180 def delete_snapshots(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> None: 181 self.engine_adapter.delete_from( 182 self.snapshots_table, where=self._snapshot_id_filter(snapshot_ids) 183 ) 184 185 def snapshots_exist(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> t.Set[SnapshotId]: 186 return { 187 SnapshotId(name=name, identifier=identifier) 188 for name, identifier in self.engine_adapter.fetchall( 189 exp.select("name", "identifier") 190 .from_(self.snapshots_table) 191 .where(self._snapshot_id_filter(snapshot_ids)) 192 ) 193 } 194 195 def reset(self) -> None: 196 """Resets the state store to the state when it was first initialized.""" 197 self.engine_adapter.drop_table(self.snapshots_table) 198 self.engine_adapter.drop_table(self.environments_table) 199 self.init_schema() 200 201 def _update_environment(self, environment: Environment) -> None: 202 self.engine_adapter.delete_from( 203 self.environments_table, 204 where=exp.EQ( 205 this=exp.to_column("name"), 206 expression=exp.Literal.string(environment.name), 207 ), 208 ) 209 210 self.engine_adapter.insert_append( 211 self.environments_table, 212 next( 213 select_from_values( 214 [ 215 ( 216 environment.name, 217 json.dumps([snapshot.dict() for snapshot in environment.snapshots]), 218 environment.start_at, 219 environment.end_at, 220 environment.plan_id, 221 environment.previous_plan_id, 222 environment.expiration_ts, 223 ) 224 ], 225 columns_to_types=self.environment_columns_to_types, 226 ) 227 ), 228 columns_to_types=self.environment_columns_to_types, 229 contains_json=True, 230 ) 231 232 def _update_snapshot(self, snapshot: Snapshot) -> None: 233 self.engine_adapter.update_table( 234 self.snapshots_table, 235 {"snapshot": snapshot.json()}, 236 where=self._snapshot_id_filter([snapshot.snapshot_id]), 237 contains_json=True, 238 ) 239 240 def get_environments(self) -> t.List[Environment]: 241 """Fetches all environments. 242 243 Returns: 244 A list of all environments. 245 """ 246 return [ 247 self._environment_from_row(row) 248 for row in self.engine_adapter.fetchall( 249 self._environments_query(), ignore_unsupported_errors=True 250 ) 251 ] 252 253 def _environment_from_row(self, row: t.Tuple[str, ...]) -> Environment: 254 return Environment(**{field: row[i] for i, field in enumerate(Environment.__fields__)}) 255 256 def _environments_query( 257 self, 258 where: t.Optional[str | exp.Expression] = None, 259 lock_for_update: bool = False, 260 ) -> exp.Select: 261 query = ( 262 exp.select(*(exp.to_identifier(field) for field in Environment.__fields__)) 263 .from_(self.environments_table) 264 .where(where) 265 ) 266 if lock_for_update: 267 return query.lock(copy=False) 268 return query 269 270 def _get_snapshots( 271 self, 272 snapshot_ids: t.Optional[t.Iterable[SnapshotIdLike]] = None, 273 lock_for_update: bool = False, 274 ) -> t.Dict[SnapshotId, Snapshot]: 275 """Fetches specified snapshots or all snapshots. 276 277 Args: 278 snapshot_ids: The collection of snapshot like objects to fetch. 279 lock_for_update: Lock the snapshot rows for future update 280 281 Returns: 282 A dictionary of snapshot ids to snapshots for ones that could be found. 283 """ 284 query = ( 285 exp.select("snapshot") 286 .from_(self.snapshots_table) 287 .where(None if snapshot_ids is None else self._snapshot_id_filter(snapshot_ids)) 288 ) 289 if lock_for_update: 290 query = query.lock(copy=False) 291 292 snapshots: t.Dict[SnapshotId, Snapshot] = {} 293 duplicates: t.Dict[SnapshotId, Snapshot] = {} 294 295 for row in self.engine_adapter.fetchall(query, ignore_unsupported_errors=True): 296 snapshot = Snapshot.parse_raw(row[0]) 297 snapshot_id = snapshot.snapshot_id 298 if snapshot_id in snapshots: 299 other = duplicates.get(snapshot_id, snapshots[snapshot_id]) 300 duplicates[snapshot_id] = ( 301 snapshot if snapshot.updated_ts > other.updated_ts else other 302 ) 303 snapshots[snapshot_id] = duplicates[snapshot_id] 304 else: 305 snapshots[snapshot_id] = snapshot 306 307 if duplicates: 308 self._push_snapshots(duplicates.values(), overwrite=True) 309 logger.error("Found duplicate snapshots in the state store.") 310 311 return snapshots 312 313 def _get_snapshots_with_same_version( 314 self, 315 snapshots: t.Iterable[SnapshotNameVersionLike], 316 lock_for_update: bool = False, 317 ) -> t.List[Snapshot]: 318 """Fetches all snapshots that share the same version as the snapshots. 319 320 The output includes the snapshots with the specified identifiers. 321 322 Args: 323 snapshots: The collection of target name / version pairs. 324 lock_for_update: Lock the snapshot rows for future update 325 326 Returns: 327 The list of Snapshot objects. 328 """ 329 if not snapshots: 330 return [] 331 332 query = ( 333 exp.select("snapshot") 334 .from_(self.snapshots_table) 335 .where(self._snapshot_name_version_filter(snapshots)) 336 ) 337 if lock_for_update: 338 query = query.lock(copy=False) 339 340 snapshot_rows = self.engine_adapter.fetchall(query, ignore_unsupported_errors=True) 341 return [Snapshot(**json.loads(row[0])) for row in snapshot_rows] 342 343 def _get_environment( 344 self, environment: str, lock_for_update: bool = False 345 ) -> t.Optional[Environment]: 346 """Fetches the environment if it exists. 347 348 Args: 349 environment: The environment 350 lock_for_update: Lock the snapshot rows for future update 351 352 Returns: 353 The environment object. 354 """ 355 row = self.engine_adapter.fetchone( 356 self._environments_query( 357 where=exp.EQ( 358 this=exp.to_column("name"), 359 expression=exp.Literal.string(environment), 360 ), 361 lock_for_update=lock_for_update, 362 ), 363 ignore_unsupported_errors=True, 364 ) 365 366 if not row: 367 return None 368 369 env = self._environment_from_row(row) 370 return env 371 372 def _snapshot_id_filter( 373 self, snapshot_ids: t.Iterable[SnapshotIdLike] 374 ) -> t.Union[exp.Or, exp.Boolean]: 375 if not snapshot_ids: 376 return exp.FALSE 377 378 return exp.or_( 379 *( 380 exp.and_( 381 exp.EQ( 382 this=exp.to_column("name"), 383 expression=exp.Literal.string(snapshot_id.name), 384 ), 385 exp.EQ( 386 this=exp.to_column("identifier"), 387 expression=exp.Literal.string(snapshot_id.identifier), 388 ), 389 ) 390 for snapshot_id in snapshot_ids 391 ) 392 ) 393 394 def _snapshot_name_version_filter( 395 self, snapshot_name_versions: t.Iterable[SnapshotNameVersionLike] 396 ) -> t.Union[exp.Or, exp.Boolean]: 397 if not snapshot_name_versions: 398 return exp.FALSE 399 400 return exp.or_( 401 *( 402 exp.and_( 403 exp.EQ( 404 this=exp.to_column("name"), 405 expression=exp.Literal.string(snapshot_name_version.name), 406 ), 407 exp.EQ( 408 this=exp.to_column("version"), 409 expression=exp.Literal.string(snapshot_name_version.version), 410 ), 411 ) 412 for snapshot_name_version in snapshot_name_versions 413 ) 414 ) 415 416 @contextlib.contextmanager 417 def _transaction(self, transaction_type: TransactionType) -> t.Generator[None, None, None]: 418 with self.engine_adapter.transaction(transaction_type=transaction_type): 419 yield
43class EngineAdapterStateSync(CommonStateSyncMixin, StateSync): 44 """Manages state of models and snapshot with an existing engine adapter. 45 46 This state sync is convenient to use because it requires no additional setup. 47 You can reuse the same engine/warehouse that your data is stored in. 48 49 Args: 50 engine_adapter: The EngineAdapter to use to store and fetch snapshots. 51 schema: The schema to store state metadata in. 52 """ 53 54 def __init__( 55 self, 56 engine_adapter: EngineAdapter, 57 schema: str, 58 ): 59 self.engine_adapter = engine_adapter 60 self.snapshots_table = f"{schema}._snapshots" 61 self.environments_table = f"{schema}._environments" 62 63 @property 64 def snapshot_columns_to_types(self) -> t.Dict[str, exp.DataType]: 65 return { 66 "name": exp.DataType.build("text"), 67 "identifier": exp.DataType.build("text"), 68 "version": exp.DataType.build("text"), 69 "snapshot": exp.DataType.build("text"), 70 } 71 72 @property 73 def environment_columns_to_types(self) -> t.Dict[str, exp.DataType]: 74 return { 75 "name": exp.DataType.build("text"), 76 "snapshots": exp.DataType.build("text"), 77 "start_at": exp.DataType.build("text"), 78 "end_at": exp.DataType.build("text"), 79 "plan_id": exp.DataType.build("text"), 80 "previous_plan_id": exp.DataType.build("text"), 81 "expiration_ts": exp.DataType.build("bigint"), 82 } 83 84 @transactional(transaction_type=TransactionType.DDL) 85 def init_schema(self) -> None: 86 """Creates the schema and table to store state.""" 87 self.engine_adapter.create_schema(self.snapshots_table) 88 89 self.engine_adapter.create_state_table( 90 self.snapshots_table, 91 self.snapshot_columns_to_types, 92 primary_key=("name", "identifier"), 93 ) 94 95 self.engine_adapter.create_index( 96 self.snapshots_table, "name_version_idx", ("name", "version") 97 ) 98 99 self.engine_adapter.create_state_table( 100 self.environments_table, 101 self.environment_columns_to_types, 102 primary_key=("name",), 103 ) 104 105 @transactional() 106 def push_snapshots(self, snapshots: t.Iterable[Snapshot]) -> None: 107 """Pushes snapshots to the state store, merging them with existing ones. 108 109 This method first finds all existing snapshots in the store and merges them with 110 the local snapshots. It will then delete all existing snapshots and then 111 insert all the local snapshots. This can be made safer with locks or merge/upsert. 112 113 Args: 114 snapshot_ids: Iterable of snapshot ids to bulk push. 115 """ 116 snapshots_by_id = {} 117 for snapshot in snapshots: 118 if not snapshot.version: 119 raise SQLMeshError( 120 f"Snapshot {snapshot} has not been versioned yet. Create a plan before pushing a snapshot." 121 ) 122 snapshots_by_id[snapshot.snapshot_id] = snapshot 123 124 existing = self.snapshots_exist(snapshots_by_id) 125 126 if existing: 127 raise SQLMeshError(f"Snapshots {existing} already exists.") 128 129 snapshots = snapshots_by_id.values() 130 131 if snapshots: 132 self._push_snapshots(snapshots) 133 134 def _push_snapshots(self, snapshots: t.Iterable[Snapshot], overwrite: bool = False) -> None: 135 if overwrite: 136 snapshots = tuple(snapshots) 137 self.delete_snapshots(snapshots) 138 139 self.engine_adapter.insert_append( 140 self.snapshots_table, 141 next( 142 select_from_values( 143 [ 144 ( 145 snapshot.name, 146 snapshot.identifier, 147 snapshot.version, 148 snapshot.json(), 149 ) 150 for snapshot in snapshots 151 ], 152 columns_to_types=self.snapshot_columns_to_types, 153 ) 154 ), 155 contains_json=True, 156 ) 157 158 def delete_expired_environments(self) -> t.List[Environment]: 159 now_ts = now_timestamp() 160 filter_expr = exp.LTE( 161 this=exp.to_column("expiration_ts"), 162 expression=exp.Literal.number(now_ts), 163 ) 164 165 rows = self.engine_adapter.fetchall( 166 self._environments_query( 167 where=filter_expr, 168 lock_for_update=True, 169 ), 170 ignore_unsupported_errors=True, 171 ) 172 environments = [self._environment_from_row(r) for r in rows] 173 174 self.engine_adapter.delete_from( 175 self.environments_table, 176 where=filter_expr, 177 ) 178 179 return environments 180 181 def delete_snapshots(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> None: 182 self.engine_adapter.delete_from( 183 self.snapshots_table, where=self._snapshot_id_filter(snapshot_ids) 184 ) 185 186 def snapshots_exist(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> t.Set[SnapshotId]: 187 return { 188 SnapshotId(name=name, identifier=identifier) 189 for name, identifier in self.engine_adapter.fetchall( 190 exp.select("name", "identifier") 191 .from_(self.snapshots_table) 192 .where(self._snapshot_id_filter(snapshot_ids)) 193 ) 194 } 195 196 def reset(self) -> None: 197 """Resets the state store to the state when it was first initialized.""" 198 self.engine_adapter.drop_table(self.snapshots_table) 199 self.engine_adapter.drop_table(self.environments_table) 200 self.init_schema() 201 202 def _update_environment(self, environment: Environment) -> None: 203 self.engine_adapter.delete_from( 204 self.environments_table, 205 where=exp.EQ( 206 this=exp.to_column("name"), 207 expression=exp.Literal.string(environment.name), 208 ), 209 ) 210 211 self.engine_adapter.insert_append( 212 self.environments_table, 213 next( 214 select_from_values( 215 [ 216 ( 217 environment.name, 218 json.dumps([snapshot.dict() for snapshot in environment.snapshots]), 219 environment.start_at, 220 environment.end_at, 221 environment.plan_id, 222 environment.previous_plan_id, 223 environment.expiration_ts, 224 ) 225 ], 226 columns_to_types=self.environment_columns_to_types, 227 ) 228 ), 229 columns_to_types=self.environment_columns_to_types, 230 contains_json=True, 231 ) 232 233 def _update_snapshot(self, snapshot: Snapshot) -> None: 234 self.engine_adapter.update_table( 235 self.snapshots_table, 236 {"snapshot": snapshot.json()}, 237 where=self._snapshot_id_filter([snapshot.snapshot_id]), 238 contains_json=True, 239 ) 240 241 def get_environments(self) -> t.List[Environment]: 242 """Fetches all environments. 243 244 Returns: 245 A list of all environments. 246 """ 247 return [ 248 self._environment_from_row(row) 249 for row in self.engine_adapter.fetchall( 250 self._environments_query(), ignore_unsupported_errors=True 251 ) 252 ] 253 254 def _environment_from_row(self, row: t.Tuple[str, ...]) -> Environment: 255 return Environment(**{field: row[i] for i, field in enumerate(Environment.__fields__)}) 256 257 def _environments_query( 258 self, 259 where: t.Optional[str | exp.Expression] = None, 260 lock_for_update: bool = False, 261 ) -> exp.Select: 262 query = ( 263 exp.select(*(exp.to_identifier(field) for field in Environment.__fields__)) 264 .from_(self.environments_table) 265 .where(where) 266 ) 267 if lock_for_update: 268 return query.lock(copy=False) 269 return query 270 271 def _get_snapshots( 272 self, 273 snapshot_ids: t.Optional[t.Iterable[SnapshotIdLike]] = None, 274 lock_for_update: bool = False, 275 ) -> t.Dict[SnapshotId, Snapshot]: 276 """Fetches specified snapshots or all snapshots. 277 278 Args: 279 snapshot_ids: The collection of snapshot like objects to fetch. 280 lock_for_update: Lock the snapshot rows for future update 281 282 Returns: 283 A dictionary of snapshot ids to snapshots for ones that could be found. 284 """ 285 query = ( 286 exp.select("snapshot") 287 .from_(self.snapshots_table) 288 .where(None if snapshot_ids is None else self._snapshot_id_filter(snapshot_ids)) 289 ) 290 if lock_for_update: 291 query = query.lock(copy=False) 292 293 snapshots: t.Dict[SnapshotId, Snapshot] = {} 294 duplicates: t.Dict[SnapshotId, Snapshot] = {} 295 296 for row in self.engine_adapter.fetchall(query, ignore_unsupported_errors=True): 297 snapshot = Snapshot.parse_raw(row[0]) 298 snapshot_id = snapshot.snapshot_id 299 if snapshot_id in snapshots: 300 other = duplicates.get(snapshot_id, snapshots[snapshot_id]) 301 duplicates[snapshot_id] = ( 302 snapshot if snapshot.updated_ts > other.updated_ts else other 303 ) 304 snapshots[snapshot_id] = duplicates[snapshot_id] 305 else: 306 snapshots[snapshot_id] = snapshot 307 308 if duplicates: 309 self._push_snapshots(duplicates.values(), overwrite=True) 310 logger.error("Found duplicate snapshots in the state store.") 311 312 return snapshots 313 314 def _get_snapshots_with_same_version( 315 self, 316 snapshots: t.Iterable[SnapshotNameVersionLike], 317 lock_for_update: bool = False, 318 ) -> t.List[Snapshot]: 319 """Fetches all snapshots that share the same version as the snapshots. 320 321 The output includes the snapshots with the specified identifiers. 322 323 Args: 324 snapshots: The collection of target name / version pairs. 325 lock_for_update: Lock the snapshot rows for future update 326 327 Returns: 328 The list of Snapshot objects. 329 """ 330 if not snapshots: 331 return [] 332 333 query = ( 334 exp.select("snapshot") 335 .from_(self.snapshots_table) 336 .where(self._snapshot_name_version_filter(snapshots)) 337 ) 338 if lock_for_update: 339 query = query.lock(copy=False) 340 341 snapshot_rows = self.engine_adapter.fetchall(query, ignore_unsupported_errors=True) 342 return [Snapshot(**json.loads(row[0])) for row in snapshot_rows] 343 344 def _get_environment( 345 self, environment: str, lock_for_update: bool = False 346 ) -> t.Optional[Environment]: 347 """Fetches the environment if it exists. 348 349 Args: 350 environment: The environment 351 lock_for_update: Lock the snapshot rows for future update 352 353 Returns: 354 The environment object. 355 """ 356 row = self.engine_adapter.fetchone( 357 self._environments_query( 358 where=exp.EQ( 359 this=exp.to_column("name"), 360 expression=exp.Literal.string(environment), 361 ), 362 lock_for_update=lock_for_update, 363 ), 364 ignore_unsupported_errors=True, 365 ) 366 367 if not row: 368 return None 369 370 env = self._environment_from_row(row) 371 return env 372 373 def _snapshot_id_filter( 374 self, snapshot_ids: t.Iterable[SnapshotIdLike] 375 ) -> t.Union[exp.Or, exp.Boolean]: 376 if not snapshot_ids: 377 return exp.FALSE 378 379 return exp.or_( 380 *( 381 exp.and_( 382 exp.EQ( 383 this=exp.to_column("name"), 384 expression=exp.Literal.string(snapshot_id.name), 385 ), 386 exp.EQ( 387 this=exp.to_column("identifier"), 388 expression=exp.Literal.string(snapshot_id.identifier), 389 ), 390 ) 391 for snapshot_id in snapshot_ids 392 ) 393 ) 394 395 def _snapshot_name_version_filter( 396 self, snapshot_name_versions: t.Iterable[SnapshotNameVersionLike] 397 ) -> t.Union[exp.Or, exp.Boolean]: 398 if not snapshot_name_versions: 399 return exp.FALSE 400 401 return exp.or_( 402 *( 403 exp.and_( 404 exp.EQ( 405 this=exp.to_column("name"), 406 expression=exp.Literal.string(snapshot_name_version.name), 407 ), 408 exp.EQ( 409 this=exp.to_column("version"), 410 expression=exp.Literal.string(snapshot_name_version.version), 411 ), 412 ) 413 for snapshot_name_version in snapshot_name_versions 414 ) 415 ) 416 417 @contextlib.contextmanager 418 def _transaction(self, transaction_type: TransactionType) -> t.Generator[None, None, None]: 419 with self.engine_adapter.transaction(transaction_type=transaction_type): 420 yield
Manages state of models and snapshot with an existing engine adapter.
This state sync is convenient to use because it requires no additional setup. You can reuse the same engine/warehouse that your data is stored in.
Arguments:
- engine_adapter: The EngineAdapter to use to store and fetch snapshots.
- schema: The schema to store state metadata in.
84 @transactional(transaction_type=TransactionType.DDL) 85 def init_schema(self) -> None: 86 """Creates the schema and table to store state.""" 87 self.engine_adapter.create_schema(self.snapshots_table) 88 89 self.engine_adapter.create_state_table( 90 self.snapshots_table, 91 self.snapshot_columns_to_types, 92 primary_key=("name", "identifier"), 93 ) 94 95 self.engine_adapter.create_index( 96 self.snapshots_table, "name_version_idx", ("name", "version") 97 ) 98 99 self.engine_adapter.create_state_table( 100 self.environments_table, 101 self.environment_columns_to_types, 102 primary_key=("name",), 103 )
Creates the schema and table to store state.
105 @transactional() 106 def push_snapshots(self, snapshots: t.Iterable[Snapshot]) -> None: 107 """Pushes snapshots to the state store, merging them with existing ones. 108 109 This method first finds all existing snapshots in the store and merges them with 110 the local snapshots. It will then delete all existing snapshots and then 111 insert all the local snapshots. This can be made safer with locks or merge/upsert. 112 113 Args: 114 snapshot_ids: Iterable of snapshot ids to bulk push. 115 """ 116 snapshots_by_id = {} 117 for snapshot in snapshots: 118 if not snapshot.version: 119 raise SQLMeshError( 120 f"Snapshot {snapshot} has not been versioned yet. Create a plan before pushing a snapshot." 121 ) 122 snapshots_by_id[snapshot.snapshot_id] = snapshot 123 124 existing = self.snapshots_exist(snapshots_by_id) 125 126 if existing: 127 raise SQLMeshError(f"Snapshots {existing} already exists.") 128 129 snapshots = snapshots_by_id.values() 130 131 if snapshots: 132 self._push_snapshots(snapshots)
Pushes snapshots to the state store, merging them with existing ones.
This method first finds all existing snapshots in the store and merges them with the local snapshots. It will then delete all existing snapshots and then insert all the local snapshots. This can be made safer with locks or merge/upsert.
Arguments:
- snapshot_ids: Iterable of snapshot ids to bulk push.
158 def delete_expired_environments(self) -> t.List[Environment]: 159 now_ts = now_timestamp() 160 filter_expr = exp.LTE( 161 this=exp.to_column("expiration_ts"), 162 expression=exp.Literal.number(now_ts), 163 ) 164 165 rows = self.engine_adapter.fetchall( 166 self._environments_query( 167 where=filter_expr, 168 lock_for_update=True, 169 ), 170 ignore_unsupported_errors=True, 171 ) 172 environments = [self._environment_from_row(r) for r in rows] 173 174 self.engine_adapter.delete_from( 175 self.environments_table, 176 where=filter_expr, 177 ) 178 179 return environments
Removes expired environments.
Expired environments are environments that have exceeded their time-to-live value.
Returns:
The list of removed environments.
181 def delete_snapshots(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> None: 182 self.engine_adapter.delete_from( 183 self.snapshots_table, where=self._snapshot_id_filter(snapshot_ids) 184 )
Delete snapshots from the state sync.
Arguments:
- snapshot_ids: A list of snapshot like objects to delete.
186 def snapshots_exist(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> t.Set[SnapshotId]: 187 return { 188 SnapshotId(name=name, identifier=identifier) 189 for name, identifier in self.engine_adapter.fetchall( 190 exp.select("name", "identifier") 191 .from_(self.snapshots_table) 192 .where(self._snapshot_id_filter(snapshot_ids)) 193 ) 194 }
Checks if multiple snapshots exist in the state sync.
Arguments:
- snapshot_ids: Iterable of snapshot ids to bulk check.
Returns:
A set of all the existing snapshot ids.
196 def reset(self) -> None: 197 """Resets the state store to the state when it was first initialized.""" 198 self.engine_adapter.drop_table(self.snapshots_table) 199 self.engine_adapter.drop_table(self.environments_table) 200 self.init_schema()
Resets the state store to the state when it was first initialized.
241 def get_environments(self) -> t.List[Environment]: 242 """Fetches all environments. 243 244 Returns: 245 A list of all environments. 246 """ 247 return [ 248 self._environment_from_row(row) 249 for row in self.engine_adapter.fetchall( 250 self._environments_query(), ignore_unsupported_errors=True 251 ) 252 ]
Fetches all environments.
Returns:
A list of all environments.