sqlmesh.core.audit.definition
1from __future__ import annotations 2 3import pathlib 4import typing as t 5from pathlib import Path 6 7from pydantic import Field, validator 8from sqlglot import exp 9 10from sqlmesh.core import constants as c 11from sqlmesh.core import dialect as d 12from sqlmesh.core.model.definition import Model, _Model, expression_validator 13from sqlmesh.core.renderer import QueryRenderer 14from sqlmesh.utils.date import TimeLike 15from sqlmesh.utils.errors import AuditConfigError, raise_config_error 16from sqlmesh.utils.pydantic import PydanticModel 17 18if t.TYPE_CHECKING: 19 from sqlmesh.core.snapshot import Snapshot 20 21 22class AuditMeta(PydanticModel): 23 """Metadata for audits which can be defined in SQL.""" 24 25 name: str 26 """The name of this audit.""" 27 dialect: str = "" 28 """The dialect of the audit query.""" 29 skip: bool = False 30 """Setting this to `true` will cause this audit to be skipped. Defaults to `false`.""" 31 blocking: bool = True 32 """Setting this to `true` will cause the pipeline execution to stop if this audit fails. Defaults to `true`.""" 33 34 @validator("name", "dialect", pre=True) 35 def _string_validator(cls, v: t.Any) -> t.Optional[str]: 36 if isinstance(v, exp.Expression): 37 return v.name.lower() 38 return str(v).lower() if v is not None else None 39 40 @validator("skip", "blocking", pre=True) 41 def _bool_validator(cls, v: t.Any) -> bool: 42 if isinstance(v, exp.Boolean): 43 return v.this 44 if isinstance(v, exp.Expression): 45 return v.name.lower() not in ("false", "no") 46 return bool(v) 47 48 49class Audit(AuditMeta, frozen=True): 50 """Audit is an assertion made about your SQLMesh models. 51 52 An audit is a SQL query that returns bad records. 53 """ 54 55 query: t.Union[exp.Subqueryable, d.Jinja] 56 expressions_: t.Optional[t.List[exp.Expression]] = Field(default=None, alias="expressions") 57 58 _path: t.Optional[pathlib.Path] = None 59 60 _query_validator = expression_validator 61 62 @classmethod 63 def load( 64 cls, 65 expressions: t.List[exp.Expression], 66 *, 67 path: pathlib.Path, 68 dialect: t.Optional[str] = None, 69 ) -> Audit: 70 """Load an audit from a parsed SQLMesh audit file. 71 72 Args: 73 expressions: Audit, *Statements, Query 74 path: An optional path of the file. 75 dialect: The default dialect if no audit dialect is configured. 76 """ 77 if len(expressions) < 2: 78 _raise_config_error("Incomplete audit definition, missing AUDIT or QUERY", path) 79 80 meta, *statements, query = expressions 81 82 if not isinstance(meta, d.Audit): 83 _raise_config_error( 84 "AUDIT statement is required as the first statement in the definition", 85 path, 86 ) 87 raise 88 89 provided_meta_fields = {p.name for p in meta.expressions} 90 91 missing_required_fields = AuditMeta.missing_required_fields(provided_meta_fields) 92 if missing_required_fields: 93 _raise_config_error( 94 f"Missing required fields {missing_required_fields} in the audit definition", 95 path, 96 ) 97 98 extra_fields = AuditMeta.extra_fields(provided_meta_fields) 99 if extra_fields: 100 _raise_config_error( 101 f"Invalid extra fields {extra_fields} in the audit definition", path 102 ) 103 104 if not isinstance(query, exp.Subqueryable): 105 _raise_config_error("Missing SELECT query in the audit definition", path) 106 raise 107 108 try: 109 audit = cls( 110 query=query, 111 expressions=statements, 112 **{ 113 "dialect": dialect or "", 114 **AuditMeta( 115 **{prop.name: prop.args.get("value") for prop in meta.expressions if prop}, 116 ).dict(), 117 }, 118 ) 119 except Exception as ex: 120 _raise_config_error(str(ex), path) 121 122 audit._path = path 123 return audit 124 125 @classmethod 126 def load_multiple( 127 cls, 128 expressions: t.List[exp.Expression], 129 *, 130 path: pathlib.Path, 131 dialect: t.Optional[str] = None, 132 ) -> t.Generator[Audit, None, None]: 133 audit_block: t.List[exp.Expression] = [] 134 for expression in expressions: 135 if isinstance(expression, d.Audit): 136 if audit_block: 137 yield Audit.load( 138 expressions=audit_block, 139 path=path, 140 dialect=dialect, 141 ) 142 audit_block.clear() 143 audit_block.append(expression) 144 yield Audit.load( 145 expressions=audit_block, 146 path=path, 147 dialect=dialect, 148 ) 149 150 def render_query( 151 self, 152 snapshot_or_model: t.Union[Snapshot, Model], 153 *, 154 start: t.Optional[TimeLike] = None, 155 end: t.Optional[TimeLike] = None, 156 latest: t.Optional[TimeLike] = None, 157 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 158 is_dev: bool = False, 159 **kwargs: t.Any, 160 ) -> exp.Subqueryable: 161 """Renders the audit's query. 162 163 Args: 164 snapshot_or_model: The snapshot or model which is being audited. 165 start: The start datetime to render. Defaults to epoch start. 166 end: The end datetime to render. Defaults to epoch start. 167 latest: The latest datetime to use for non-incremental queries. Defaults to epoch start. 168 snapshots: All snapshots (by model name) to use for mapping of physical locations. 169 audit_name: The name of audit if the query to render is for an audit. 170 is_dev: Indicates whether the rendering happens in the development mode and temporary 171 tables / table clones should be used where applicable. 172 kwargs: Additional kwargs to pass to the renderer. 173 174 Returns: 175 The rendered expression. 176 """ 177 178 if isinstance(snapshot_or_model, _Model): 179 model = snapshot_or_model 180 this_model = snapshot_or_model.name 181 else: 182 model = snapshot_or_model.model 183 this_model = snapshot_or_model.table_name(is_dev=is_dev, for_read=True) 184 185 query_renderer = self._create_query_renderer(model) 186 187 this_model_subquery = exp.select("*").from_(exp.to_table(this_model)) 188 query_renderer.filter_time_column(this_model_subquery, start or c.EPOCH, end or c.EPOCH) 189 190 return query_renderer.render( 191 start=start, 192 end=end, 193 latest=latest, 194 snapshots=snapshots, 195 is_dev=is_dev, 196 this_model=this_model_subquery.subquery(), 197 **kwargs, 198 ) 199 200 @property 201 def expressions(self) -> t.List[exp.Expression]: 202 return self.expressions_ or [] 203 204 @property 205 def macro_definitions(self) -> t.List[d.MacroDef]: 206 """All macro definitions from the list of expressions.""" 207 return [s for s in self.expressions if isinstance(s, d.MacroDef)] 208 209 def _create_query_renderer(self, model: Model) -> QueryRenderer: 210 return QueryRenderer( 211 self.query, 212 self.dialect, 213 self.macro_definitions, 214 path=self._path or Path(), 215 python_env=model.python_env, 216 time_column=model.time_column, 217 time_converter=model.convert_to_time_column, 218 only_latest=model.kind.only_latest, 219 ) 220 221 222class AuditResult(PydanticModel): 223 audit: Audit 224 """The audit this result is for.""" 225 count: int 226 """The number of records returned by the audit query.""" 227 query: exp.Expression 228 """The rendered query used by the audit.""" 229 230 231def _raise_config_error(msg: str, path: pathlib.Path) -> None: 232 raise_config_error(msg, location=path, error_type=AuditConfigError)
23class AuditMeta(PydanticModel): 24 """Metadata for audits which can be defined in SQL.""" 25 26 name: str 27 """The name of this audit.""" 28 dialect: str = "" 29 """The dialect of the audit query.""" 30 skip: bool = False 31 """Setting this to `true` will cause this audit to be skipped. Defaults to `false`.""" 32 blocking: bool = True 33 """Setting this to `true` will cause the pipeline execution to stop if this audit fails. Defaults to `true`.""" 34 35 @validator("name", "dialect", pre=True) 36 def _string_validator(cls, v: t.Any) -> t.Optional[str]: 37 if isinstance(v, exp.Expression): 38 return v.name.lower() 39 return str(v).lower() if v is not None else None 40 41 @validator("skip", "blocking", pre=True) 42 def _bool_validator(cls, v: t.Any) -> bool: 43 if isinstance(v, exp.Boolean): 44 return v.this 45 if isinstance(v, exp.Expression): 46 return v.name.lower() not in ("false", "no") 47 return bool(v)
Metadata for audits which can be defined in SQL.
blocking: bool
Setting this to true
will cause the pipeline execution to stop if this audit fails. Defaults to true
.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
50class Audit(AuditMeta, frozen=True): 51 """Audit is an assertion made about your SQLMesh models. 52 53 An audit is a SQL query that returns bad records. 54 """ 55 56 query: t.Union[exp.Subqueryable, d.Jinja] 57 expressions_: t.Optional[t.List[exp.Expression]] = Field(default=None, alias="expressions") 58 59 _path: t.Optional[pathlib.Path] = None 60 61 _query_validator = expression_validator 62 63 @classmethod 64 def load( 65 cls, 66 expressions: t.List[exp.Expression], 67 *, 68 path: pathlib.Path, 69 dialect: t.Optional[str] = None, 70 ) -> Audit: 71 """Load an audit from a parsed SQLMesh audit file. 72 73 Args: 74 expressions: Audit, *Statements, Query 75 path: An optional path of the file. 76 dialect: The default dialect if no audit dialect is configured. 77 """ 78 if len(expressions) < 2: 79 _raise_config_error("Incomplete audit definition, missing AUDIT or QUERY", path) 80 81 meta, *statements, query = expressions 82 83 if not isinstance(meta, d.Audit): 84 _raise_config_error( 85 "AUDIT statement is required as the first statement in the definition", 86 path, 87 ) 88 raise 89 90 provided_meta_fields = {p.name for p in meta.expressions} 91 92 missing_required_fields = AuditMeta.missing_required_fields(provided_meta_fields) 93 if missing_required_fields: 94 _raise_config_error( 95 f"Missing required fields {missing_required_fields} in the audit definition", 96 path, 97 ) 98 99 extra_fields = AuditMeta.extra_fields(provided_meta_fields) 100 if extra_fields: 101 _raise_config_error( 102 f"Invalid extra fields {extra_fields} in the audit definition", path 103 ) 104 105 if not isinstance(query, exp.Subqueryable): 106 _raise_config_error("Missing SELECT query in the audit definition", path) 107 raise 108 109 try: 110 audit = cls( 111 query=query, 112 expressions=statements, 113 **{ 114 "dialect": dialect or "", 115 **AuditMeta( 116 **{prop.name: prop.args.get("value") for prop in meta.expressions if prop}, 117 ).dict(), 118 }, 119 ) 120 except Exception as ex: 121 _raise_config_error(str(ex), path) 122 123 audit._path = path 124 return audit 125 126 @classmethod 127 def load_multiple( 128 cls, 129 expressions: t.List[exp.Expression], 130 *, 131 path: pathlib.Path, 132 dialect: t.Optional[str] = None, 133 ) -> t.Generator[Audit, None, None]: 134 audit_block: t.List[exp.Expression] = [] 135 for expression in expressions: 136 if isinstance(expression, d.Audit): 137 if audit_block: 138 yield Audit.load( 139 expressions=audit_block, 140 path=path, 141 dialect=dialect, 142 ) 143 audit_block.clear() 144 audit_block.append(expression) 145 yield Audit.load( 146 expressions=audit_block, 147 path=path, 148 dialect=dialect, 149 ) 150 151 def render_query( 152 self, 153 snapshot_or_model: t.Union[Snapshot, Model], 154 *, 155 start: t.Optional[TimeLike] = None, 156 end: t.Optional[TimeLike] = None, 157 latest: t.Optional[TimeLike] = None, 158 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 159 is_dev: bool = False, 160 **kwargs: t.Any, 161 ) -> exp.Subqueryable: 162 """Renders the audit's query. 163 164 Args: 165 snapshot_or_model: The snapshot or model which is being audited. 166 start: The start datetime to render. Defaults to epoch start. 167 end: The end datetime to render. Defaults to epoch start. 168 latest: The latest datetime to use for non-incremental queries. Defaults to epoch start. 169 snapshots: All snapshots (by model name) to use for mapping of physical locations. 170 audit_name: The name of audit if the query to render is for an audit. 171 is_dev: Indicates whether the rendering happens in the development mode and temporary 172 tables / table clones should be used where applicable. 173 kwargs: Additional kwargs to pass to the renderer. 174 175 Returns: 176 The rendered expression. 177 """ 178 179 if isinstance(snapshot_or_model, _Model): 180 model = snapshot_or_model 181 this_model = snapshot_or_model.name 182 else: 183 model = snapshot_or_model.model 184 this_model = snapshot_or_model.table_name(is_dev=is_dev, for_read=True) 185 186 query_renderer = self._create_query_renderer(model) 187 188 this_model_subquery = exp.select("*").from_(exp.to_table(this_model)) 189 query_renderer.filter_time_column(this_model_subquery, start or c.EPOCH, end or c.EPOCH) 190 191 return query_renderer.render( 192 start=start, 193 end=end, 194 latest=latest, 195 snapshots=snapshots, 196 is_dev=is_dev, 197 this_model=this_model_subquery.subquery(), 198 **kwargs, 199 ) 200 201 @property 202 def expressions(self) -> t.List[exp.Expression]: 203 return self.expressions_ or [] 204 205 @property 206 def macro_definitions(self) -> t.List[d.MacroDef]: 207 """All macro definitions from the list of expressions.""" 208 return [s for s in self.expressions if isinstance(s, d.MacroDef)] 209 210 def _create_query_renderer(self, model: Model) -> QueryRenderer: 211 return QueryRenderer( 212 self.query, 213 self.dialect, 214 self.macro_definitions, 215 path=self._path or Path(), 216 python_env=model.python_env, 217 time_column=model.time_column, 218 time_converter=model.convert_to_time_column, 219 only_latest=model.kind.only_latest, 220 )
Audit is an assertion made about your SQLMesh models.
An audit is a SQL query that returns bad records.
@classmethod
def
load( cls, expressions: List[sqlglot.expressions.Expression], *, path: pathlib.Path, dialect: Optional[str] = None) -> sqlmesh.core.audit.definition.Audit:
63 @classmethod 64 def load( 65 cls, 66 expressions: t.List[exp.Expression], 67 *, 68 path: pathlib.Path, 69 dialect: t.Optional[str] = None, 70 ) -> Audit: 71 """Load an audit from a parsed SQLMesh audit file. 72 73 Args: 74 expressions: Audit, *Statements, Query 75 path: An optional path of the file. 76 dialect: The default dialect if no audit dialect is configured. 77 """ 78 if len(expressions) < 2: 79 _raise_config_error("Incomplete audit definition, missing AUDIT or QUERY", path) 80 81 meta, *statements, query = expressions 82 83 if not isinstance(meta, d.Audit): 84 _raise_config_error( 85 "AUDIT statement is required as the first statement in the definition", 86 path, 87 ) 88 raise 89 90 provided_meta_fields = {p.name for p in meta.expressions} 91 92 missing_required_fields = AuditMeta.missing_required_fields(provided_meta_fields) 93 if missing_required_fields: 94 _raise_config_error( 95 f"Missing required fields {missing_required_fields} in the audit definition", 96 path, 97 ) 98 99 extra_fields = AuditMeta.extra_fields(provided_meta_fields) 100 if extra_fields: 101 _raise_config_error( 102 f"Invalid extra fields {extra_fields} in the audit definition", path 103 ) 104 105 if not isinstance(query, exp.Subqueryable): 106 _raise_config_error("Missing SELECT query in the audit definition", path) 107 raise 108 109 try: 110 audit = cls( 111 query=query, 112 expressions=statements, 113 **{ 114 "dialect": dialect or "", 115 **AuditMeta( 116 **{prop.name: prop.args.get("value") for prop in meta.expressions if prop}, 117 ).dict(), 118 }, 119 ) 120 except Exception as ex: 121 _raise_config_error(str(ex), path) 122 123 audit._path = path 124 return audit
Load an audit from a parsed SQLMesh audit file.
Arguments:
- expressions: Audit, *Statements, Query
- path: An optional path of the file.
- dialect: The default dialect if no audit dialect is configured.
@classmethod
def
load_multiple( cls, expressions: List[sqlglot.expressions.Expression], *, path: pathlib.Path, dialect: Optional[str] = None) -> Generator[sqlmesh.core.audit.definition.Audit, NoneType, NoneType]:
126 @classmethod 127 def load_multiple( 128 cls, 129 expressions: t.List[exp.Expression], 130 *, 131 path: pathlib.Path, 132 dialect: t.Optional[str] = None, 133 ) -> t.Generator[Audit, None, None]: 134 audit_block: t.List[exp.Expression] = [] 135 for expression in expressions: 136 if isinstance(expression, d.Audit): 137 if audit_block: 138 yield Audit.load( 139 expressions=audit_block, 140 path=path, 141 dialect=dialect, 142 ) 143 audit_block.clear() 144 audit_block.append(expression) 145 yield Audit.load( 146 expressions=audit_block, 147 path=path, 148 dialect=dialect, 149 )
def
render_query( self, snapshot_or_model: Union[sqlmesh.core.snapshot.definition.Snapshot, Annotated[Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})]], *, start: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, end: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, latest: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, snapshots: Optional[Dict[str, sqlmesh.core.snapshot.definition.Snapshot]] = None, is_dev: bool = False, **kwargs: Any) -> sqlglot.expressions.Subqueryable:
151 def render_query( 152 self, 153 snapshot_or_model: t.Union[Snapshot, Model], 154 *, 155 start: t.Optional[TimeLike] = None, 156 end: t.Optional[TimeLike] = None, 157 latest: t.Optional[TimeLike] = None, 158 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 159 is_dev: bool = False, 160 **kwargs: t.Any, 161 ) -> exp.Subqueryable: 162 """Renders the audit's query. 163 164 Args: 165 snapshot_or_model: The snapshot or model which is being audited. 166 start: The start datetime to render. Defaults to epoch start. 167 end: The end datetime to render. Defaults to epoch start. 168 latest: The latest datetime to use for non-incremental queries. Defaults to epoch start. 169 snapshots: All snapshots (by model name) to use for mapping of physical locations. 170 audit_name: The name of audit if the query to render is for an audit. 171 is_dev: Indicates whether the rendering happens in the development mode and temporary 172 tables / table clones should be used where applicable. 173 kwargs: Additional kwargs to pass to the renderer. 174 175 Returns: 176 The rendered expression. 177 """ 178 179 if isinstance(snapshot_or_model, _Model): 180 model = snapshot_or_model 181 this_model = snapshot_or_model.name 182 else: 183 model = snapshot_or_model.model 184 this_model = snapshot_or_model.table_name(is_dev=is_dev, for_read=True) 185 186 query_renderer = self._create_query_renderer(model) 187 188 this_model_subquery = exp.select("*").from_(exp.to_table(this_model)) 189 query_renderer.filter_time_column(this_model_subquery, start or c.EPOCH, end or c.EPOCH) 190 191 return query_renderer.render( 192 start=start, 193 end=end, 194 latest=latest, 195 snapshots=snapshots, 196 is_dev=is_dev, 197 this_model=this_model_subquery.subquery(), 198 **kwargs, 199 )
Renders the audit's query.
Arguments:
- snapshot_or_model: The snapshot or model which is being audited.
- start: The start datetime to render. Defaults to epoch start.
- end: The end datetime to render. Defaults to epoch start.
- latest: The latest datetime to use for non-incremental queries. Defaults to epoch start.
- snapshots: All snapshots (by model name) to use for mapping of physical locations.
- audit_name: The name of audit if the query to render is for an audit.
- is_dev: Indicates whether the rendering happens in the development mode and temporary tables / table clones should be used where applicable.
- kwargs: Additional kwargs to pass to the renderer.
Returns:
The rendered expression.
macro_definitions: List[sqlmesh.core.dialect.MacroDef]
All macro definitions from the list of expressions.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
223class AuditResult(PydanticModel): 224 audit: Audit 225 """The audit this result is for.""" 226 count: int 227 """The number of records returned by the audit query.""" 228 query: exp.Expression 229 """The rendered query used by the audit."""
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs