Edit on GitHub

sqlmesh.core.audit.definition

  1from __future__ import annotations
  2
  3import pathlib
  4import typing as t
  5from pathlib import Path
  6
  7from pydantic import Field, validator
  8from sqlglot import exp
  9
 10from sqlmesh.core import constants as c
 11from sqlmesh.core import dialect as d
 12from sqlmesh.core.model.definition import Model, _Model, expression_validator
 13from sqlmesh.core.renderer import QueryRenderer
 14from sqlmesh.utils.date import TimeLike
 15from sqlmesh.utils.errors import AuditConfigError, raise_config_error
 16from sqlmesh.utils.pydantic import PydanticModel
 17
 18if t.TYPE_CHECKING:
 19    from sqlmesh.core.snapshot import Snapshot
 20
 21
 22class AuditMeta(PydanticModel):
 23    """Metadata for audits which can be defined in SQL."""
 24
 25    name: str
 26    """The name of this audit."""
 27    dialect: str = ""
 28    """The dialect of the audit query."""
 29    skip: bool = False
 30    """Setting this to `true` will cause this audit to be skipped. Defaults to `false`."""
 31    blocking: bool = True
 32    """Setting this to `true` will cause the pipeline execution to stop if this audit fails. Defaults to `true`."""
 33
 34    @validator("name", "dialect", pre=True)
 35    def _string_validator(cls, v: t.Any) -> t.Optional[str]:
 36        if isinstance(v, exp.Expression):
 37            return v.name.lower()
 38        return str(v).lower() if v is not None else None
 39
 40    @validator("skip", "blocking", pre=True)
 41    def _bool_validator(cls, v: t.Any) -> bool:
 42        if isinstance(v, exp.Boolean):
 43            return v.this
 44        if isinstance(v, exp.Expression):
 45            return v.name.lower() not in ("false", "no")
 46        return bool(v)
 47
 48
 49class Audit(AuditMeta, frozen=True):
 50    """Audit is an assertion made about your SQLMesh models.
 51
 52    An audit is a SQL query that returns bad records.
 53    """
 54
 55    query: t.Union[exp.Subqueryable, d.Jinja]
 56    expressions_: t.Optional[t.List[exp.Expression]] = Field(default=None, alias="expressions")
 57
 58    _path: t.Optional[pathlib.Path] = None
 59
 60    _query_validator = expression_validator
 61
 62    @classmethod
 63    def load(
 64        cls,
 65        expressions: t.List[exp.Expression],
 66        *,
 67        path: pathlib.Path,
 68        dialect: t.Optional[str] = None,
 69    ) -> Audit:
 70        """Load an audit from a parsed SQLMesh audit file.
 71
 72        Args:
 73            expressions: Audit, *Statements, Query
 74            path: An optional path of the file.
 75            dialect: The default dialect if no audit dialect is configured.
 76        """
 77        if len(expressions) < 2:
 78            _raise_config_error("Incomplete audit definition, missing AUDIT or QUERY", path)
 79
 80        meta, *statements, query = expressions
 81
 82        if not isinstance(meta, d.Audit):
 83            _raise_config_error(
 84                "AUDIT statement is required as the first statement in the definition",
 85                path,
 86            )
 87            raise
 88
 89        provided_meta_fields = {p.name for p in meta.expressions}
 90
 91        missing_required_fields = AuditMeta.missing_required_fields(provided_meta_fields)
 92        if missing_required_fields:
 93            _raise_config_error(
 94                f"Missing required fields {missing_required_fields} in the audit definition",
 95                path,
 96            )
 97
 98        extra_fields = AuditMeta.extra_fields(provided_meta_fields)
 99        if extra_fields:
100            _raise_config_error(
101                f"Invalid extra fields {extra_fields} in the audit definition", path
102            )
103
104        if not isinstance(query, exp.Subqueryable):
105            _raise_config_error("Missing SELECT query in the audit definition", path)
106            raise
107
108        try:
109            audit = cls(
110                query=query,
111                expressions=statements,
112                **{
113                    "dialect": dialect or "",
114                    **AuditMeta(
115                        **{prop.name: prop.args.get("value") for prop in meta.expressions if prop},
116                    ).dict(),
117                },
118            )
119        except Exception as ex:
120            _raise_config_error(str(ex), path)
121
122        audit._path = path
123        return audit
124
125    @classmethod
126    def load_multiple(
127        cls,
128        expressions: t.List[exp.Expression],
129        *,
130        path: pathlib.Path,
131        dialect: t.Optional[str] = None,
132    ) -> t.Generator[Audit, None, None]:
133        audit_block: t.List[exp.Expression] = []
134        for expression in expressions:
135            if isinstance(expression, d.Audit):
136                if audit_block:
137                    yield Audit.load(
138                        expressions=audit_block,
139                        path=path,
140                        dialect=dialect,
141                    )
142                    audit_block.clear()
143            audit_block.append(expression)
144        yield Audit.load(
145            expressions=audit_block,
146            path=path,
147            dialect=dialect,
148        )
149
150    def render_query(
151        self,
152        snapshot_or_model: t.Union[Snapshot, Model],
153        *,
154        start: t.Optional[TimeLike] = None,
155        end: t.Optional[TimeLike] = None,
156        latest: t.Optional[TimeLike] = None,
157        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
158        is_dev: bool = False,
159        **kwargs: t.Any,
160    ) -> exp.Subqueryable:
161        """Renders the audit's query.
162
163        Args:
164            snapshot_or_model: The snapshot or model which is being audited.
165            start: The start datetime to render. Defaults to epoch start.
166            end: The end datetime to render. Defaults to epoch start.
167            latest: The latest datetime to use for non-incremental queries. Defaults to epoch start.
168            snapshots: All snapshots (by model name) to use for mapping of physical locations.
169            audit_name: The name of audit if the query to render is for an audit.
170            is_dev: Indicates whether the rendering happens in the development mode and temporary
171                tables / table clones should be used where applicable.
172            kwargs: Additional kwargs to pass to the renderer.
173
174        Returns:
175            The rendered expression.
176        """
177
178        if isinstance(snapshot_or_model, _Model):
179            model = snapshot_or_model
180            this_model = snapshot_or_model.name
181        else:
182            model = snapshot_or_model.model
183            this_model = snapshot_or_model.table_name(is_dev=is_dev, for_read=True)
184
185        query_renderer = self._create_query_renderer(model)
186
187        this_model_subquery = exp.select("*").from_(exp.to_table(this_model))
188        query_renderer.filter_time_column(this_model_subquery, start or c.EPOCH, end or c.EPOCH)
189
190        return query_renderer.render(
191            start=start,
192            end=end,
193            latest=latest,
194            snapshots=snapshots,
195            is_dev=is_dev,
196            this_model=this_model_subquery.subquery(),
197            **kwargs,
198        )
199
200    @property
201    def expressions(self) -> t.List[exp.Expression]:
202        return self.expressions_ or []
203
204    @property
205    def macro_definitions(self) -> t.List[d.MacroDef]:
206        """All macro definitions from the list of expressions."""
207        return [s for s in self.expressions if isinstance(s, d.MacroDef)]
208
209    def _create_query_renderer(self, model: Model) -> QueryRenderer:
210        return QueryRenderer(
211            self.query,
212            self.dialect,
213            self.macro_definitions,
214            path=self._path or Path(),
215            python_env=model.python_env,
216            time_column=model.time_column,
217            time_converter=model.convert_to_time_column,
218            only_latest=model.kind.only_latest,
219        )
220
221
222class AuditResult(PydanticModel):
223    audit: Audit
224    """The audit this result is for."""
225    count: int
226    """The number of records returned by the audit query."""
227    query: exp.Expression
228    """The rendered query used by the audit."""
229
230
231def _raise_config_error(msg: str, path: pathlib.Path) -> None:
232    raise_config_error(msg, location=path, error_type=AuditConfigError)
class AuditMeta(sqlmesh.utils.pydantic.PydanticModel):
23class AuditMeta(PydanticModel):
24    """Metadata for audits which can be defined in SQL."""
25
26    name: str
27    """The name of this audit."""
28    dialect: str = ""
29    """The dialect of the audit query."""
30    skip: bool = False
31    """Setting this to `true` will cause this audit to be skipped. Defaults to `false`."""
32    blocking: bool = True
33    """Setting this to `true` will cause the pipeline execution to stop if this audit fails. Defaults to `true`."""
34
35    @validator("name", "dialect", pre=True)
36    def _string_validator(cls, v: t.Any) -> t.Optional[str]:
37        if isinstance(v, exp.Expression):
38            return v.name.lower()
39        return str(v).lower() if v is not None else None
40
41    @validator("skip", "blocking", pre=True)
42    def _bool_validator(cls, v: t.Any) -> bool:
43        if isinstance(v, exp.Boolean):
44            return v.this
45        if isinstance(v, exp.Expression):
46            return v.name.lower() not in ("false", "no")
47        return bool(v)

Metadata for audits which can be defined in SQL.

name: str

The name of this audit.

dialect: str

The dialect of the audit query.

skip: bool

Setting this to true will cause this audit to be skipped. Defaults to false.

blocking: bool

Setting this to true will cause the pipeline execution to stop if this audit fails. Defaults to true.

Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class Audit(AuditMeta):
 50class Audit(AuditMeta, frozen=True):
 51    """Audit is an assertion made about your SQLMesh models.
 52
 53    An audit is a SQL query that returns bad records.
 54    """
 55
 56    query: t.Union[exp.Subqueryable, d.Jinja]
 57    expressions_: t.Optional[t.List[exp.Expression]] = Field(default=None, alias="expressions")
 58
 59    _path: t.Optional[pathlib.Path] = None
 60
 61    _query_validator = expression_validator
 62
 63    @classmethod
 64    def load(
 65        cls,
 66        expressions: t.List[exp.Expression],
 67        *,
 68        path: pathlib.Path,
 69        dialect: t.Optional[str] = None,
 70    ) -> Audit:
 71        """Load an audit from a parsed SQLMesh audit file.
 72
 73        Args:
 74            expressions: Audit, *Statements, Query
 75            path: An optional path of the file.
 76            dialect: The default dialect if no audit dialect is configured.
 77        """
 78        if len(expressions) < 2:
 79            _raise_config_error("Incomplete audit definition, missing AUDIT or QUERY", path)
 80
 81        meta, *statements, query = expressions
 82
 83        if not isinstance(meta, d.Audit):
 84            _raise_config_error(
 85                "AUDIT statement is required as the first statement in the definition",
 86                path,
 87            )
 88            raise
 89
 90        provided_meta_fields = {p.name for p in meta.expressions}
 91
 92        missing_required_fields = AuditMeta.missing_required_fields(provided_meta_fields)
 93        if missing_required_fields:
 94            _raise_config_error(
 95                f"Missing required fields {missing_required_fields} in the audit definition",
 96                path,
 97            )
 98
 99        extra_fields = AuditMeta.extra_fields(provided_meta_fields)
100        if extra_fields:
101            _raise_config_error(
102                f"Invalid extra fields {extra_fields} in the audit definition", path
103            )
104
105        if not isinstance(query, exp.Subqueryable):
106            _raise_config_error("Missing SELECT query in the audit definition", path)
107            raise
108
109        try:
110            audit = cls(
111                query=query,
112                expressions=statements,
113                **{
114                    "dialect": dialect or "",
115                    **AuditMeta(
116                        **{prop.name: prop.args.get("value") for prop in meta.expressions if prop},
117                    ).dict(),
118                },
119            )
120        except Exception as ex:
121            _raise_config_error(str(ex), path)
122
123        audit._path = path
124        return audit
125
126    @classmethod
127    def load_multiple(
128        cls,
129        expressions: t.List[exp.Expression],
130        *,
131        path: pathlib.Path,
132        dialect: t.Optional[str] = None,
133    ) -> t.Generator[Audit, None, None]:
134        audit_block: t.List[exp.Expression] = []
135        for expression in expressions:
136            if isinstance(expression, d.Audit):
137                if audit_block:
138                    yield Audit.load(
139                        expressions=audit_block,
140                        path=path,
141                        dialect=dialect,
142                    )
143                    audit_block.clear()
144            audit_block.append(expression)
145        yield Audit.load(
146            expressions=audit_block,
147            path=path,
148            dialect=dialect,
149        )
150
151    def render_query(
152        self,
153        snapshot_or_model: t.Union[Snapshot, Model],
154        *,
155        start: t.Optional[TimeLike] = None,
156        end: t.Optional[TimeLike] = None,
157        latest: t.Optional[TimeLike] = None,
158        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
159        is_dev: bool = False,
160        **kwargs: t.Any,
161    ) -> exp.Subqueryable:
162        """Renders the audit's query.
163
164        Args:
165            snapshot_or_model: The snapshot or model which is being audited.
166            start: The start datetime to render. Defaults to epoch start.
167            end: The end datetime to render. Defaults to epoch start.
168            latest: The latest datetime to use for non-incremental queries. Defaults to epoch start.
169            snapshots: All snapshots (by model name) to use for mapping of physical locations.
170            audit_name: The name of audit if the query to render is for an audit.
171            is_dev: Indicates whether the rendering happens in the development mode and temporary
172                tables / table clones should be used where applicable.
173            kwargs: Additional kwargs to pass to the renderer.
174
175        Returns:
176            The rendered expression.
177        """
178
179        if isinstance(snapshot_or_model, _Model):
180            model = snapshot_or_model
181            this_model = snapshot_or_model.name
182        else:
183            model = snapshot_or_model.model
184            this_model = snapshot_or_model.table_name(is_dev=is_dev, for_read=True)
185
186        query_renderer = self._create_query_renderer(model)
187
188        this_model_subquery = exp.select("*").from_(exp.to_table(this_model))
189        query_renderer.filter_time_column(this_model_subquery, start or c.EPOCH, end or c.EPOCH)
190
191        return query_renderer.render(
192            start=start,
193            end=end,
194            latest=latest,
195            snapshots=snapshots,
196            is_dev=is_dev,
197            this_model=this_model_subquery.subquery(),
198            **kwargs,
199        )
200
201    @property
202    def expressions(self) -> t.List[exp.Expression]:
203        return self.expressions_ or []
204
205    @property
206    def macro_definitions(self) -> t.List[d.MacroDef]:
207        """All macro definitions from the list of expressions."""
208        return [s for s in self.expressions if isinstance(s, d.MacroDef)]
209
210    def _create_query_renderer(self, model: Model) -> QueryRenderer:
211        return QueryRenderer(
212            self.query,
213            self.dialect,
214            self.macro_definitions,
215            path=self._path or Path(),
216            python_env=model.python_env,
217            time_column=model.time_column,
218            time_converter=model.convert_to_time_column,
219            only_latest=model.kind.only_latest,
220        )

Audit is an assertion made about your SQLMesh models.

An audit is a SQL query that returns bad records.

@classmethod
def load( cls, expressions: List[sqlglot.expressions.Expression], *, path: pathlib.Path, dialect: Optional[str] = None) -> sqlmesh.core.audit.definition.Audit:
 63    @classmethod
 64    def load(
 65        cls,
 66        expressions: t.List[exp.Expression],
 67        *,
 68        path: pathlib.Path,
 69        dialect: t.Optional[str] = None,
 70    ) -> Audit:
 71        """Load an audit from a parsed SQLMesh audit file.
 72
 73        Args:
 74            expressions: Audit, *Statements, Query
 75            path: An optional path of the file.
 76            dialect: The default dialect if no audit dialect is configured.
 77        """
 78        if len(expressions) < 2:
 79            _raise_config_error("Incomplete audit definition, missing AUDIT or QUERY", path)
 80
 81        meta, *statements, query = expressions
 82
 83        if not isinstance(meta, d.Audit):
 84            _raise_config_error(
 85                "AUDIT statement is required as the first statement in the definition",
 86                path,
 87            )
 88            raise
 89
 90        provided_meta_fields = {p.name for p in meta.expressions}
 91
 92        missing_required_fields = AuditMeta.missing_required_fields(provided_meta_fields)
 93        if missing_required_fields:
 94            _raise_config_error(
 95                f"Missing required fields {missing_required_fields} in the audit definition",
 96                path,
 97            )
 98
 99        extra_fields = AuditMeta.extra_fields(provided_meta_fields)
100        if extra_fields:
101            _raise_config_error(
102                f"Invalid extra fields {extra_fields} in the audit definition", path
103            )
104
105        if not isinstance(query, exp.Subqueryable):
106            _raise_config_error("Missing SELECT query in the audit definition", path)
107            raise
108
109        try:
110            audit = cls(
111                query=query,
112                expressions=statements,
113                **{
114                    "dialect": dialect or "",
115                    **AuditMeta(
116                        **{prop.name: prop.args.get("value") for prop in meta.expressions if prop},
117                    ).dict(),
118                },
119            )
120        except Exception as ex:
121            _raise_config_error(str(ex), path)
122
123        audit._path = path
124        return audit

Load an audit from a parsed SQLMesh audit file.

Arguments:
  • expressions: Audit, *Statements, Query
  • path: An optional path of the file.
  • dialect: The default dialect if no audit dialect is configured.
@classmethod
def load_multiple( cls, expressions: List[sqlglot.expressions.Expression], *, path: pathlib.Path, dialect: Optional[str] = None) -> Generator[sqlmesh.core.audit.definition.Audit, NoneType, NoneType]:
126    @classmethod
127    def load_multiple(
128        cls,
129        expressions: t.List[exp.Expression],
130        *,
131        path: pathlib.Path,
132        dialect: t.Optional[str] = None,
133    ) -> t.Generator[Audit, None, None]:
134        audit_block: t.List[exp.Expression] = []
135        for expression in expressions:
136            if isinstance(expression, d.Audit):
137                if audit_block:
138                    yield Audit.load(
139                        expressions=audit_block,
140                        path=path,
141                        dialect=dialect,
142                    )
143                    audit_block.clear()
144            audit_block.append(expression)
145        yield Audit.load(
146            expressions=audit_block,
147            path=path,
148            dialect=dialect,
149        )
def render_query( self, snapshot_or_model: Union[sqlmesh.core.snapshot.definition.Snapshot, Annotated[Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})]], *, start: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, end: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, latest: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, snapshots: Optional[Dict[str, sqlmesh.core.snapshot.definition.Snapshot]] = None, is_dev: bool = False, **kwargs: Any) -> sqlglot.expressions.Subqueryable:
151    def render_query(
152        self,
153        snapshot_or_model: t.Union[Snapshot, Model],
154        *,
155        start: t.Optional[TimeLike] = None,
156        end: t.Optional[TimeLike] = None,
157        latest: t.Optional[TimeLike] = None,
158        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
159        is_dev: bool = False,
160        **kwargs: t.Any,
161    ) -> exp.Subqueryable:
162        """Renders the audit's query.
163
164        Args:
165            snapshot_or_model: The snapshot or model which is being audited.
166            start: The start datetime to render. Defaults to epoch start.
167            end: The end datetime to render. Defaults to epoch start.
168            latest: The latest datetime to use for non-incremental queries. Defaults to epoch start.
169            snapshots: All snapshots (by model name) to use for mapping of physical locations.
170            audit_name: The name of audit if the query to render is for an audit.
171            is_dev: Indicates whether the rendering happens in the development mode and temporary
172                tables / table clones should be used where applicable.
173            kwargs: Additional kwargs to pass to the renderer.
174
175        Returns:
176            The rendered expression.
177        """
178
179        if isinstance(snapshot_or_model, _Model):
180            model = snapshot_or_model
181            this_model = snapshot_or_model.name
182        else:
183            model = snapshot_or_model.model
184            this_model = snapshot_or_model.table_name(is_dev=is_dev, for_read=True)
185
186        query_renderer = self._create_query_renderer(model)
187
188        this_model_subquery = exp.select("*").from_(exp.to_table(this_model))
189        query_renderer.filter_time_column(this_model_subquery, start or c.EPOCH, end or c.EPOCH)
190
191        return query_renderer.render(
192            start=start,
193            end=end,
194            latest=latest,
195            snapshots=snapshots,
196            is_dev=is_dev,
197            this_model=this_model_subquery.subquery(),
198            **kwargs,
199        )

Renders the audit's query.

Arguments:
  • snapshot_or_model: The snapshot or model which is being audited.
  • start: The start datetime to render. Defaults to epoch start.
  • end: The end datetime to render. Defaults to epoch start.
  • latest: The latest datetime to use for non-incremental queries. Defaults to epoch start.
  • snapshots: All snapshots (by model name) to use for mapping of physical locations.
  • audit_name: The name of audit if the query to render is for an audit.
  • is_dev: Indicates whether the rendering happens in the development mode and temporary tables / table clones should be used where applicable.
  • kwargs: Additional kwargs to pass to the renderer.
Returns:

The rendered expression.

macro_definitions: List[sqlmesh.core.dialect.MacroDef]

All macro definitions from the list of expressions.

Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
AuditMeta
name
dialect
skip
blocking
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class AuditResult(sqlmesh.utils.pydantic.PydanticModel):
223class AuditResult(PydanticModel):
224    audit: Audit
225    """The audit this result is for."""
226    count: int
227    """The number of records returned by the audit query."""
228    query: exp.Expression
229    """The rendered query used by the audit."""

The audit this result is for.

count: int

The number of records returned by the audit query.

query: sqlglot.expressions.Expression

The rendered query used by the audit.

Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields