Edit on GitHub

StateSync

State sync is how SQLMesh keeps track of environments and their states, e.g. snapshots.

StateReader

StateReader provides a subset of the functionalities of the StateSync class. As its name implies, it only allows for read-only operations on snapshots and environment states.

EngineAdapterStateSync

The provided sqlmesh.core.state_sync.EngineAdapterStateSync leverages an existing engine adapter to read and write state to the underlying data store.

  1"""
  2# StateSync
  3
  4State sync is how SQLMesh keeps track of environments and their states, e.g. snapshots.
  5
  6# StateReader
  7
  8StateReader provides a subset of the functionalities of the StateSync class. As its name
  9implies, it only allows for read-only operations on snapshots and environment states.
 10
 11# EngineAdapterStateSync
 12
 13The provided `sqlmesh.core.state_sync.EngineAdapterStateSync` leverages an existing engine
 14adapter to read and write state to the underlying data store.
 15"""
 16from __future__ import annotations
 17
 18import contextlib
 19import json
 20import logging
 21import typing as t
 22
 23from sqlglot import exp
 24
 25from sqlmesh.core.dialect import select_from_values
 26from sqlmesh.core.engine_adapter import EngineAdapter, TransactionType
 27from sqlmesh.core.environment import Environment
 28from sqlmesh.core.snapshot import (
 29    Snapshot,
 30    SnapshotId,
 31    SnapshotIdLike,
 32    SnapshotNameVersionLike,
 33)
 34from sqlmesh.core.state_sync.base import StateSync
 35from sqlmesh.core.state_sync.common import CommonStateSyncMixin, transactional
 36from sqlmesh.utils.date import now_timestamp
 37from sqlmesh.utils.errors import SQLMeshError
 38
 39logger = logging.getLogger(__name__)
 40
 41
 42class EngineAdapterStateSync(CommonStateSyncMixin, StateSync):
 43    """Manages state of models and snapshot with an existing engine adapter.
 44
 45    This state sync is convenient to use because it requires no additional setup.
 46    You can reuse the same engine/warehouse that your data is stored in.
 47
 48    Args:
 49        engine_adapter: The EngineAdapter to use to store and fetch snapshots.
 50        schema: The schema to store state metadata in.
 51    """
 52
 53    def __init__(
 54        self,
 55        engine_adapter: EngineAdapter,
 56        schema: str,
 57    ):
 58        self.engine_adapter = engine_adapter
 59        self.snapshots_table = f"{schema}._snapshots"
 60        self.environments_table = f"{schema}._environments"
 61
 62    @property
 63    def snapshot_columns_to_types(self) -> t.Dict[str, exp.DataType]:
 64        return {
 65            "name": exp.DataType.build("text"),
 66            "identifier": exp.DataType.build("text"),
 67            "version": exp.DataType.build("text"),
 68            "snapshot": exp.DataType.build("text"),
 69        }
 70
 71    @property
 72    def environment_columns_to_types(self) -> t.Dict[str, exp.DataType]:
 73        return {
 74            "name": exp.DataType.build("text"),
 75            "snapshots": exp.DataType.build("text"),
 76            "start_at": exp.DataType.build("text"),
 77            "end_at": exp.DataType.build("text"),
 78            "plan_id": exp.DataType.build("text"),
 79            "previous_plan_id": exp.DataType.build("text"),
 80            "expiration_ts": exp.DataType.build("bigint"),
 81        }
 82
 83    @transactional(transaction_type=TransactionType.DDL)
 84    def init_schema(self) -> None:
 85        """Creates the schema and table to store state."""
 86        self.engine_adapter.create_schema(self.snapshots_table)
 87
 88        self.engine_adapter.create_state_table(
 89            self.snapshots_table,
 90            self.snapshot_columns_to_types,
 91            primary_key=("name", "identifier"),
 92        )
 93
 94        self.engine_adapter.create_index(
 95            self.snapshots_table, "name_version_idx", ("name", "version")
 96        )
 97
 98        self.engine_adapter.create_state_table(
 99            self.environments_table,
100            self.environment_columns_to_types,
101            primary_key=("name",),
102        )
103
104    @transactional()
105    def push_snapshots(self, snapshots: t.Iterable[Snapshot]) -> None:
106        """Pushes snapshots to the state store, merging them with existing ones.
107
108        This method first finds all existing snapshots in the store and merges them with
109        the local snapshots. It will then delete all existing snapshots and then
110        insert all the local snapshots. This can be made safer with locks or merge/upsert.
111
112        Args:
113            snapshot_ids: Iterable of snapshot ids to bulk push.
114        """
115        snapshots_by_id = {}
116        for snapshot in snapshots:
117            if not snapshot.version:
118                raise SQLMeshError(
119                    f"Snapshot {snapshot} has not been versioned yet. Create a plan before pushing a snapshot."
120                )
121            snapshots_by_id[snapshot.snapshot_id] = snapshot
122
123        existing = self.snapshots_exist(snapshots_by_id)
124
125        if existing:
126            raise SQLMeshError(f"Snapshots {existing} already exists.")
127
128        snapshots = snapshots_by_id.values()
129
130        if snapshots:
131            self._push_snapshots(snapshots)
132
133    def _push_snapshots(self, snapshots: t.Iterable[Snapshot], overwrite: bool = False) -> None:
134        if overwrite:
135            snapshots = tuple(snapshots)
136            self.delete_snapshots(snapshots)
137
138        self.engine_adapter.insert_append(
139            self.snapshots_table,
140            next(
141                select_from_values(
142                    [
143                        (
144                            snapshot.name,
145                            snapshot.identifier,
146                            snapshot.version,
147                            snapshot.json(),
148                        )
149                        for snapshot in snapshots
150                    ],
151                    columns_to_types=self.snapshot_columns_to_types,
152                )
153            ),
154            contains_json=True,
155        )
156
157    def delete_expired_environments(self) -> t.List[Environment]:
158        now_ts = now_timestamp()
159        filter_expr = exp.LTE(
160            this=exp.to_column("expiration_ts"),
161            expression=exp.Literal.number(now_ts),
162        )
163
164        rows = self.engine_adapter.fetchall(
165            self._environments_query(
166                where=filter_expr,
167                lock_for_update=True,
168            ),
169            ignore_unsupported_errors=True,
170        )
171        environments = [self._environment_from_row(r) for r in rows]
172
173        self.engine_adapter.delete_from(
174            self.environments_table,
175            where=filter_expr,
176        )
177
178        return environments
179
180    def delete_snapshots(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> None:
181        self.engine_adapter.delete_from(
182            self.snapshots_table, where=self._snapshot_id_filter(snapshot_ids)
183        )
184
185    def snapshots_exist(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> t.Set[SnapshotId]:
186        return {
187            SnapshotId(name=name, identifier=identifier)
188            for name, identifier in self.engine_adapter.fetchall(
189                exp.select("name", "identifier")
190                .from_(self.snapshots_table)
191                .where(self._snapshot_id_filter(snapshot_ids))
192            )
193        }
194
195    def reset(self) -> None:
196        """Resets the state store to the state when it was first initialized."""
197        self.engine_adapter.drop_table(self.snapshots_table)
198        self.engine_adapter.drop_table(self.environments_table)
199        self.init_schema()
200
201    def _update_environment(self, environment: Environment) -> None:
202        self.engine_adapter.delete_from(
203            self.environments_table,
204            where=exp.EQ(
205                this=exp.to_column("name"),
206                expression=exp.Literal.string(environment.name),
207            ),
208        )
209
210        self.engine_adapter.insert_append(
211            self.environments_table,
212            next(
213                select_from_values(
214                    [
215                        (
216                            environment.name,
217                            json.dumps([snapshot.dict() for snapshot in environment.snapshots]),
218                            environment.start_at,
219                            environment.end_at,
220                            environment.plan_id,
221                            environment.previous_plan_id,
222                            environment.expiration_ts,
223                        )
224                    ],
225                    columns_to_types=self.environment_columns_to_types,
226                )
227            ),
228            columns_to_types=self.environment_columns_to_types,
229            contains_json=True,
230        )
231
232    def _update_snapshot(self, snapshot: Snapshot) -> None:
233        self.engine_adapter.update_table(
234            self.snapshots_table,
235            {"snapshot": snapshot.json()},
236            where=self._snapshot_id_filter([snapshot.snapshot_id]),
237            contains_json=True,
238        )
239
240    def get_environments(self) -> t.List[Environment]:
241        """Fetches all environments.
242
243        Returns:
244            A list of all environments.
245        """
246        return [
247            self._environment_from_row(row)
248            for row in self.engine_adapter.fetchall(
249                self._environments_query(), ignore_unsupported_errors=True
250            )
251        ]
252
253    def _environment_from_row(self, row: t.Tuple[str, ...]) -> Environment:
254        return Environment(**{field: row[i] for i, field in enumerate(Environment.__fields__)})
255
256    def _environments_query(
257        self,
258        where: t.Optional[str | exp.Expression] = None,
259        lock_for_update: bool = False,
260    ) -> exp.Select:
261        query = (
262            exp.select(*(exp.to_identifier(field) for field in Environment.__fields__))
263            .from_(self.environments_table)
264            .where(where)
265        )
266        if lock_for_update:
267            return query.lock(copy=False)
268        return query
269
270    def _get_snapshots(
271        self,
272        snapshot_ids: t.Optional[t.Iterable[SnapshotIdLike]] = None,
273        lock_for_update: bool = False,
274    ) -> t.Dict[SnapshotId, Snapshot]:
275        """Fetches specified snapshots or all snapshots.
276
277        Args:
278            snapshot_ids: The collection of snapshot like objects to fetch.
279            lock_for_update: Lock the snapshot rows for future update
280
281        Returns:
282            A dictionary of snapshot ids to snapshots for ones that could be found.
283        """
284        query = (
285            exp.select("snapshot")
286            .from_(self.snapshots_table)
287            .where(None if snapshot_ids is None else self._snapshot_id_filter(snapshot_ids))
288        )
289        if lock_for_update:
290            query = query.lock(copy=False)
291
292        snapshots: t.Dict[SnapshotId, Snapshot] = {}
293        duplicates: t.Dict[SnapshotId, Snapshot] = {}
294
295        for row in self.engine_adapter.fetchall(query, ignore_unsupported_errors=True):
296            snapshot = Snapshot.parse_raw(row[0])
297            snapshot_id = snapshot.snapshot_id
298            if snapshot_id in snapshots:
299                other = duplicates.get(snapshot_id, snapshots[snapshot_id])
300                duplicates[snapshot_id] = (
301                    snapshot if snapshot.updated_ts > other.updated_ts else other
302                )
303                snapshots[snapshot_id] = duplicates[snapshot_id]
304            else:
305                snapshots[snapshot_id] = snapshot
306
307        if duplicates:
308            self._push_snapshots(duplicates.values(), overwrite=True)
309            logger.error("Found duplicate snapshots in the state store.")
310
311        return snapshots
312
313    def _get_snapshots_with_same_version(
314        self,
315        snapshots: t.Iterable[SnapshotNameVersionLike],
316        lock_for_update: bool = False,
317    ) -> t.List[Snapshot]:
318        """Fetches all snapshots that share the same version as the snapshots.
319
320        The output includes the snapshots with the specified identifiers.
321
322        Args:
323            snapshots: The collection of target name / version pairs.
324            lock_for_update: Lock the snapshot rows for future update
325
326        Returns:
327            The list of Snapshot objects.
328        """
329        if not snapshots:
330            return []
331
332        query = (
333            exp.select("snapshot")
334            .from_(self.snapshots_table)
335            .where(self._snapshot_name_version_filter(snapshots))
336        )
337        if lock_for_update:
338            query = query.lock(copy=False)
339
340        snapshot_rows = self.engine_adapter.fetchall(query, ignore_unsupported_errors=True)
341        return [Snapshot(**json.loads(row[0])) for row in snapshot_rows]
342
343    def _get_environment(
344        self, environment: str, lock_for_update: bool = False
345    ) -> t.Optional[Environment]:
346        """Fetches the environment if it exists.
347
348        Args:
349            environment: The environment
350            lock_for_update: Lock the snapshot rows for future update
351
352        Returns:
353            The environment object.
354        """
355        row = self.engine_adapter.fetchone(
356            self._environments_query(
357                where=exp.EQ(
358                    this=exp.to_column("name"),
359                    expression=exp.Literal.string(environment),
360                ),
361                lock_for_update=lock_for_update,
362            ),
363            ignore_unsupported_errors=True,
364        )
365
366        if not row:
367            return None
368
369        env = self._environment_from_row(row)
370        return env
371
372    def _snapshot_id_filter(
373        self, snapshot_ids: t.Iterable[SnapshotIdLike]
374    ) -> t.Union[exp.Or, exp.Boolean]:
375        if not snapshot_ids:
376            return exp.FALSE
377
378        return exp.or_(
379            *(
380                exp.and_(
381                    exp.EQ(
382                        this=exp.to_column("name"),
383                        expression=exp.Literal.string(snapshot_id.name),
384                    ),
385                    exp.EQ(
386                        this=exp.to_column("identifier"),
387                        expression=exp.Literal.string(snapshot_id.identifier),
388                    ),
389                )
390                for snapshot_id in snapshot_ids
391            )
392        )
393
394    def _snapshot_name_version_filter(
395        self, snapshot_name_versions: t.Iterable[SnapshotNameVersionLike]
396    ) -> t.Union[exp.Or, exp.Boolean]:
397        if not snapshot_name_versions:
398            return exp.FALSE
399
400        return exp.or_(
401            *(
402                exp.and_(
403                    exp.EQ(
404                        this=exp.to_column("name"),
405                        expression=exp.Literal.string(snapshot_name_version.name),
406                    ),
407                    exp.EQ(
408                        this=exp.to_column("version"),
409                        expression=exp.Literal.string(snapshot_name_version.version),
410                    ),
411                )
412                for snapshot_name_version in snapshot_name_versions
413            )
414        )
415
416    @contextlib.contextmanager
417    def _transaction(self, transaction_type: TransactionType) -> t.Generator[None, None, None]:
418        with self.engine_adapter.transaction(transaction_type=transaction_type):
419            yield
 43class EngineAdapterStateSync(CommonStateSyncMixin, StateSync):
 44    """Manages state of models and snapshot with an existing engine adapter.
 45
 46    This state sync is convenient to use because it requires no additional setup.
 47    You can reuse the same engine/warehouse that your data is stored in.
 48
 49    Args:
 50        engine_adapter: The EngineAdapter to use to store and fetch snapshots.
 51        schema: The schema to store state metadata in.
 52    """
 53
 54    def __init__(
 55        self,
 56        engine_adapter: EngineAdapter,
 57        schema: str,
 58    ):
 59        self.engine_adapter = engine_adapter
 60        self.snapshots_table = f"{schema}._snapshots"
 61        self.environments_table = f"{schema}._environments"
 62
 63    @property
 64    def snapshot_columns_to_types(self) -> t.Dict[str, exp.DataType]:
 65        return {
 66            "name": exp.DataType.build("text"),
 67            "identifier": exp.DataType.build("text"),
 68            "version": exp.DataType.build("text"),
 69            "snapshot": exp.DataType.build("text"),
 70        }
 71
 72    @property
 73    def environment_columns_to_types(self) -> t.Dict[str, exp.DataType]:
 74        return {
 75            "name": exp.DataType.build("text"),
 76            "snapshots": exp.DataType.build("text"),
 77            "start_at": exp.DataType.build("text"),
 78            "end_at": exp.DataType.build("text"),
 79            "plan_id": exp.DataType.build("text"),
 80            "previous_plan_id": exp.DataType.build("text"),
 81            "expiration_ts": exp.DataType.build("bigint"),
 82        }
 83
 84    @transactional(transaction_type=TransactionType.DDL)
 85    def init_schema(self) -> None:
 86        """Creates the schema and table to store state."""
 87        self.engine_adapter.create_schema(self.snapshots_table)
 88
 89        self.engine_adapter.create_state_table(
 90            self.snapshots_table,
 91            self.snapshot_columns_to_types,
 92            primary_key=("name", "identifier"),
 93        )
 94
 95        self.engine_adapter.create_index(
 96            self.snapshots_table, "name_version_idx", ("name", "version")
 97        )
 98
 99        self.engine_adapter.create_state_table(
100            self.environments_table,
101            self.environment_columns_to_types,
102            primary_key=("name",),
103        )
104
105    @transactional()
106    def push_snapshots(self, snapshots: t.Iterable[Snapshot]) -> None:
107        """Pushes snapshots to the state store, merging them with existing ones.
108
109        This method first finds all existing snapshots in the store and merges them with
110        the local snapshots. It will then delete all existing snapshots and then
111        insert all the local snapshots. This can be made safer with locks or merge/upsert.
112
113        Args:
114            snapshot_ids: Iterable of snapshot ids to bulk push.
115        """
116        snapshots_by_id = {}
117        for snapshot in snapshots:
118            if not snapshot.version:
119                raise SQLMeshError(
120                    f"Snapshot {snapshot} has not been versioned yet. Create a plan before pushing a snapshot."
121                )
122            snapshots_by_id[snapshot.snapshot_id] = snapshot
123
124        existing = self.snapshots_exist(snapshots_by_id)
125
126        if existing:
127            raise SQLMeshError(f"Snapshots {existing} already exists.")
128
129        snapshots = snapshots_by_id.values()
130
131        if snapshots:
132            self._push_snapshots(snapshots)
133
134    def _push_snapshots(self, snapshots: t.Iterable[Snapshot], overwrite: bool = False) -> None:
135        if overwrite:
136            snapshots = tuple(snapshots)
137            self.delete_snapshots(snapshots)
138
139        self.engine_adapter.insert_append(
140            self.snapshots_table,
141            next(
142                select_from_values(
143                    [
144                        (
145                            snapshot.name,
146                            snapshot.identifier,
147                            snapshot.version,
148                            snapshot.json(),
149                        )
150                        for snapshot in snapshots
151                    ],
152                    columns_to_types=self.snapshot_columns_to_types,
153                )
154            ),
155            contains_json=True,
156        )
157
158    def delete_expired_environments(self) -> t.List[Environment]:
159        now_ts = now_timestamp()
160        filter_expr = exp.LTE(
161            this=exp.to_column("expiration_ts"),
162            expression=exp.Literal.number(now_ts),
163        )
164
165        rows = self.engine_adapter.fetchall(
166            self._environments_query(
167                where=filter_expr,
168                lock_for_update=True,
169            ),
170            ignore_unsupported_errors=True,
171        )
172        environments = [self._environment_from_row(r) for r in rows]
173
174        self.engine_adapter.delete_from(
175            self.environments_table,
176            where=filter_expr,
177        )
178
179        return environments
180
181    def delete_snapshots(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> None:
182        self.engine_adapter.delete_from(
183            self.snapshots_table, where=self._snapshot_id_filter(snapshot_ids)
184        )
185
186    def snapshots_exist(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> t.Set[SnapshotId]:
187        return {
188            SnapshotId(name=name, identifier=identifier)
189            for name, identifier in self.engine_adapter.fetchall(
190                exp.select("name", "identifier")
191                .from_(self.snapshots_table)
192                .where(self._snapshot_id_filter(snapshot_ids))
193            )
194        }
195
196    def reset(self) -> None:
197        """Resets the state store to the state when it was first initialized."""
198        self.engine_adapter.drop_table(self.snapshots_table)
199        self.engine_adapter.drop_table(self.environments_table)
200        self.init_schema()
201
202    def _update_environment(self, environment: Environment) -> None:
203        self.engine_adapter.delete_from(
204            self.environments_table,
205            where=exp.EQ(
206                this=exp.to_column("name"),
207                expression=exp.Literal.string(environment.name),
208            ),
209        )
210
211        self.engine_adapter.insert_append(
212            self.environments_table,
213            next(
214                select_from_values(
215                    [
216                        (
217                            environment.name,
218                            json.dumps([snapshot.dict() for snapshot in environment.snapshots]),
219                            environment.start_at,
220                            environment.end_at,
221                            environment.plan_id,
222                            environment.previous_plan_id,
223                            environment.expiration_ts,
224                        )
225                    ],
226                    columns_to_types=self.environment_columns_to_types,
227                )
228            ),
229            columns_to_types=self.environment_columns_to_types,
230            contains_json=True,
231        )
232
233    def _update_snapshot(self, snapshot: Snapshot) -> None:
234        self.engine_adapter.update_table(
235            self.snapshots_table,
236            {"snapshot": snapshot.json()},
237            where=self._snapshot_id_filter([snapshot.snapshot_id]),
238            contains_json=True,
239        )
240
241    def get_environments(self) -> t.List[Environment]:
242        """Fetches all environments.
243
244        Returns:
245            A list of all environments.
246        """
247        return [
248            self._environment_from_row(row)
249            for row in self.engine_adapter.fetchall(
250                self._environments_query(), ignore_unsupported_errors=True
251            )
252        ]
253
254    def _environment_from_row(self, row: t.Tuple[str, ...]) -> Environment:
255        return Environment(**{field: row[i] for i, field in enumerate(Environment.__fields__)})
256
257    def _environments_query(
258        self,
259        where: t.Optional[str | exp.Expression] = None,
260        lock_for_update: bool = False,
261    ) -> exp.Select:
262        query = (
263            exp.select(*(exp.to_identifier(field) for field in Environment.__fields__))
264            .from_(self.environments_table)
265            .where(where)
266        )
267        if lock_for_update:
268            return query.lock(copy=False)
269        return query
270
271    def _get_snapshots(
272        self,
273        snapshot_ids: t.Optional[t.Iterable[SnapshotIdLike]] = None,
274        lock_for_update: bool = False,
275    ) -> t.Dict[SnapshotId, Snapshot]:
276        """Fetches specified snapshots or all snapshots.
277
278        Args:
279            snapshot_ids: The collection of snapshot like objects to fetch.
280            lock_for_update: Lock the snapshot rows for future update
281
282        Returns:
283            A dictionary of snapshot ids to snapshots for ones that could be found.
284        """
285        query = (
286            exp.select("snapshot")
287            .from_(self.snapshots_table)
288            .where(None if snapshot_ids is None else self._snapshot_id_filter(snapshot_ids))
289        )
290        if lock_for_update:
291            query = query.lock(copy=False)
292
293        snapshots: t.Dict[SnapshotId, Snapshot] = {}
294        duplicates: t.Dict[SnapshotId, Snapshot] = {}
295
296        for row in self.engine_adapter.fetchall(query, ignore_unsupported_errors=True):
297            snapshot = Snapshot.parse_raw(row[0])
298            snapshot_id = snapshot.snapshot_id
299            if snapshot_id in snapshots:
300                other = duplicates.get(snapshot_id, snapshots[snapshot_id])
301                duplicates[snapshot_id] = (
302                    snapshot if snapshot.updated_ts > other.updated_ts else other
303                )
304                snapshots[snapshot_id] = duplicates[snapshot_id]
305            else:
306                snapshots[snapshot_id] = snapshot
307
308        if duplicates:
309            self._push_snapshots(duplicates.values(), overwrite=True)
310            logger.error("Found duplicate snapshots in the state store.")
311
312        return snapshots
313
314    def _get_snapshots_with_same_version(
315        self,
316        snapshots: t.Iterable[SnapshotNameVersionLike],
317        lock_for_update: bool = False,
318    ) -> t.List[Snapshot]:
319        """Fetches all snapshots that share the same version as the snapshots.
320
321        The output includes the snapshots with the specified identifiers.
322
323        Args:
324            snapshots: The collection of target name / version pairs.
325            lock_for_update: Lock the snapshot rows for future update
326
327        Returns:
328            The list of Snapshot objects.
329        """
330        if not snapshots:
331            return []
332
333        query = (
334            exp.select("snapshot")
335            .from_(self.snapshots_table)
336            .where(self._snapshot_name_version_filter(snapshots))
337        )
338        if lock_for_update:
339            query = query.lock(copy=False)
340
341        snapshot_rows = self.engine_adapter.fetchall(query, ignore_unsupported_errors=True)
342        return [Snapshot(**json.loads(row[0])) for row in snapshot_rows]
343
344    def _get_environment(
345        self, environment: str, lock_for_update: bool = False
346    ) -> t.Optional[Environment]:
347        """Fetches the environment if it exists.
348
349        Args:
350            environment: The environment
351            lock_for_update: Lock the snapshot rows for future update
352
353        Returns:
354            The environment object.
355        """
356        row = self.engine_adapter.fetchone(
357            self._environments_query(
358                where=exp.EQ(
359                    this=exp.to_column("name"),
360                    expression=exp.Literal.string(environment),
361                ),
362                lock_for_update=lock_for_update,
363            ),
364            ignore_unsupported_errors=True,
365        )
366
367        if not row:
368            return None
369
370        env = self._environment_from_row(row)
371        return env
372
373    def _snapshot_id_filter(
374        self, snapshot_ids: t.Iterable[SnapshotIdLike]
375    ) -> t.Union[exp.Or, exp.Boolean]:
376        if not snapshot_ids:
377            return exp.FALSE
378
379        return exp.or_(
380            *(
381                exp.and_(
382                    exp.EQ(
383                        this=exp.to_column("name"),
384                        expression=exp.Literal.string(snapshot_id.name),
385                    ),
386                    exp.EQ(
387                        this=exp.to_column("identifier"),
388                        expression=exp.Literal.string(snapshot_id.identifier),
389                    ),
390                )
391                for snapshot_id in snapshot_ids
392            )
393        )
394
395    def _snapshot_name_version_filter(
396        self, snapshot_name_versions: t.Iterable[SnapshotNameVersionLike]
397    ) -> t.Union[exp.Or, exp.Boolean]:
398        if not snapshot_name_versions:
399            return exp.FALSE
400
401        return exp.or_(
402            *(
403                exp.and_(
404                    exp.EQ(
405                        this=exp.to_column("name"),
406                        expression=exp.Literal.string(snapshot_name_version.name),
407                    ),
408                    exp.EQ(
409                        this=exp.to_column("version"),
410                        expression=exp.Literal.string(snapshot_name_version.version),
411                    ),
412                )
413                for snapshot_name_version in snapshot_name_versions
414            )
415        )
416
417    @contextlib.contextmanager
418    def _transaction(self, transaction_type: TransactionType) -> t.Generator[None, None, None]:
419        with self.engine_adapter.transaction(transaction_type=transaction_type):
420            yield

Manages state of models and snapshot with an existing engine adapter.

This state sync is convenient to use because it requires no additional setup. You can reuse the same engine/warehouse that your data is stored in.

Arguments:
  • engine_adapter: The EngineAdapter to use to store and fetch snapshots.
  • schema: The schema to store state metadata in.
EngineAdapterStateSync( engine_adapter: sqlmesh.core.engine_adapter.base.EngineAdapter, schema: str)
54    def __init__(
55        self,
56        engine_adapter: EngineAdapter,
57        schema: str,
58    ):
59        self.engine_adapter = engine_adapter
60        self.snapshots_table = f"{schema}._snapshots"
61        self.environments_table = f"{schema}._environments"
@transactional(transaction_type=TransactionType.DDL)
def init_schema(self) -> None:
 84    @transactional(transaction_type=TransactionType.DDL)
 85    def init_schema(self) -> None:
 86        """Creates the schema and table to store state."""
 87        self.engine_adapter.create_schema(self.snapshots_table)
 88
 89        self.engine_adapter.create_state_table(
 90            self.snapshots_table,
 91            self.snapshot_columns_to_types,
 92            primary_key=("name", "identifier"),
 93        )
 94
 95        self.engine_adapter.create_index(
 96            self.snapshots_table, "name_version_idx", ("name", "version")
 97        )
 98
 99        self.engine_adapter.create_state_table(
100            self.environments_table,
101            self.environment_columns_to_types,
102            primary_key=("name",),
103        )

Creates the schema and table to store state.

@transactional()
def push_snapshots( self, snapshots: Iterable[sqlmesh.core.snapshot.definition.Snapshot]) -> None:
105    @transactional()
106    def push_snapshots(self, snapshots: t.Iterable[Snapshot]) -> None:
107        """Pushes snapshots to the state store, merging them with existing ones.
108
109        This method first finds all existing snapshots in the store and merges them with
110        the local snapshots. It will then delete all existing snapshots and then
111        insert all the local snapshots. This can be made safer with locks or merge/upsert.
112
113        Args:
114            snapshot_ids: Iterable of snapshot ids to bulk push.
115        """
116        snapshots_by_id = {}
117        for snapshot in snapshots:
118            if not snapshot.version:
119                raise SQLMeshError(
120                    f"Snapshot {snapshot} has not been versioned yet. Create a plan before pushing a snapshot."
121                )
122            snapshots_by_id[snapshot.snapshot_id] = snapshot
123
124        existing = self.snapshots_exist(snapshots_by_id)
125
126        if existing:
127            raise SQLMeshError(f"Snapshots {existing} already exists.")
128
129        snapshots = snapshots_by_id.values()
130
131        if snapshots:
132            self._push_snapshots(snapshots)

Pushes snapshots to the state store, merging them with existing ones.

This method first finds all existing snapshots in the store and merges them with the local snapshots. It will then delete all existing snapshots and then insert all the local snapshots. This can be made safer with locks or merge/upsert.

Arguments:
  • snapshot_ids: Iterable of snapshot ids to bulk push.
def delete_expired_environments(self) -> List[sqlmesh.core.environment.Environment]:
158    def delete_expired_environments(self) -> t.List[Environment]:
159        now_ts = now_timestamp()
160        filter_expr = exp.LTE(
161            this=exp.to_column("expiration_ts"),
162            expression=exp.Literal.number(now_ts),
163        )
164
165        rows = self.engine_adapter.fetchall(
166            self._environments_query(
167                where=filter_expr,
168                lock_for_update=True,
169            ),
170            ignore_unsupported_errors=True,
171        )
172        environments = [self._environment_from_row(r) for r in rows]
173
174        self.engine_adapter.delete_from(
175            self.environments_table,
176            where=filter_expr,
177        )
178
179        return environments

Removes expired environments.

Expired environments are environments that have exceeded their time-to-live value.

Returns:

The list of removed environments.

181    def delete_snapshots(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> None:
182        self.engine_adapter.delete_from(
183            self.snapshots_table, where=self._snapshot_id_filter(snapshot_ids)
184        )

Delete snapshots from the state sync.

Arguments:
  • snapshot_ids: A list of snapshot like objects to delete.
186    def snapshots_exist(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> t.Set[SnapshotId]:
187        return {
188            SnapshotId(name=name, identifier=identifier)
189            for name, identifier in self.engine_adapter.fetchall(
190                exp.select("name", "identifier")
191                .from_(self.snapshots_table)
192                .where(self._snapshot_id_filter(snapshot_ids))
193            )
194        }

Checks if multiple snapshots exist in the state sync.

Arguments:
  • snapshot_ids: Iterable of snapshot ids to bulk check.
Returns:

A set of all the existing snapshot ids.

def reset(self) -> None:
196    def reset(self) -> None:
197        """Resets the state store to the state when it was first initialized."""
198        self.engine_adapter.drop_table(self.snapshots_table)
199        self.engine_adapter.drop_table(self.environments_table)
200        self.init_schema()

Resets the state store to the state when it was first initialized.

def get_environments(self) -> List[sqlmesh.core.environment.Environment]:
241    def get_environments(self) -> t.List[Environment]:
242        """Fetches all environments.
243
244        Returns:
245            A list of all environments.
246        """
247        return [
248            self._environment_from_row(row)
249            for row in self.engine_adapter.fetchall(
250                self._environments_query(), ignore_unsupported_errors=True
251            )
252        ]

Fetches all environments.

Returns:

A list of all environments.