sqlmesh.core.renderer
1from __future__ import annotations 2 3import typing as t 4from datetime import datetime 5from pathlib import Path 6 7from sqlglot import exp, parse_one 8from sqlglot.errors import OptimizeError, SchemaError, SqlglotError 9from sqlglot.optimizer import optimize 10from sqlglot.optimizer.annotate_types import annotate_types 11from sqlglot.optimizer.expand_laterals import expand_laterals 12from sqlglot.optimizer.pushdown_projections import pushdown_projections 13from sqlglot.optimizer.qualify_columns import qualify_columns 14from sqlglot.optimizer.qualify_tables import qualify_tables 15from sqlglot.optimizer.simplify import simplify 16from sqlglot.schema import MappingSchema 17 18from sqlmesh.core import constants as c 19from sqlmesh.core import dialect as d 20from sqlmesh.core.macros import MacroEvaluator 21from sqlmesh.core.model.kind import TimeColumn 22from sqlmesh.utils.date import TimeLike, date_dict, make_inclusive, to_datetime 23from sqlmesh.utils.errors import ConfigError, MacroEvalError, raise_config_error 24from sqlmesh.utils.jinja import JinjaMacroRegistry 25from sqlmesh.utils.metaprogramming import Executable, prepare_env 26 27if t.TYPE_CHECKING: 28 from sqlmesh.core.snapshot import Snapshot 29 30RENDER_OPTIMIZER_RULES = ( 31 qualify_tables, 32 qualify_columns, 33 expand_laterals, 34 pushdown_projections, 35 annotate_types, 36) 37 38 39def _dates( 40 start: t.Optional[TimeLike] = None, 41 end: t.Optional[TimeLike] = None, 42 latest: t.Optional[TimeLike] = None, 43) -> t.Tuple[datetime, datetime, datetime]: 44 return ( 45 *make_inclusive(start or c.EPOCH, end or c.EPOCH), 46 to_datetime(latest or c.EPOCH), 47 ) 48 49 50class ExpressionRenderer: 51 def __init__( 52 self, 53 expression: exp.Expression, 54 dialect: str, 55 macro_definitions: t.List[d.MacroDef], 56 path: Path = Path(), 57 jinja_macro_registry: t.Optional[JinjaMacroRegistry] = None, 58 python_env: t.Optional[t.Dict[str, Executable]] = None, 59 only_latest: bool = False, 60 ): 61 self._expression = expression 62 self._dialect = dialect 63 self._macro_definitions = macro_definitions 64 self._path = path 65 self._jinja_macro_registry = jinja_macro_registry or JinjaMacroRegistry() 66 self._python_env = python_env or {} 67 self._only_latest = only_latest 68 69 def render( 70 self, 71 start: t.Optional[TimeLike] = None, 72 end: t.Optional[TimeLike] = None, 73 latest: t.Optional[TimeLike] = None, 74 **kwargs: t.Any, 75 ) -> t.Optional[exp.Expression]: 76 """Renders a expression, expanding macros with provided kwargs 77 78 Args: 79 start: The start datetime to render. Defaults to epoch start. 80 end: The end datetime to render. Defaults to epoch start. 81 latest: The latest datetime to use for non-incremental models. Defaults to epoch start. 82 kwargs: Additional kwargs to pass to the renderer. 83 84 Returns: 85 The rendered expression. 86 """ 87 expression = self._expression 88 89 render_kwargs = { 90 **date_dict(*_dates(start, end, latest), only_latest=self._only_latest), 91 **kwargs, 92 } 93 94 env = prepare_env(self._python_env) 95 jinja_env = self._jinja_macro_registry.build_environment(**{**render_kwargs, **env}) 96 97 if isinstance(expression, d.Jinja): 98 try: 99 rendered_expression = jinja_env.from_string(expression.name).render() 100 if not rendered_expression: 101 return None 102 103 parsed_expression = parse_one(rendered_expression, read=self._dialect) 104 if not parsed_expression: 105 raise ConfigError(f"Failed to parse a expression {expression}") 106 expression = parsed_expression 107 except Exception as ex: 108 raise ConfigError(f"Invalid expression. {ex} at '{self._path}'") from ex 109 110 macro_evaluator = MacroEvaluator( 111 self._dialect, 112 python_env=self._python_env, 113 jinja_env=jinja_env, 114 ) 115 macro_evaluator.locals.update(render_kwargs) 116 117 for definition in self._macro_definitions: 118 try: 119 macro_evaluator.evaluate(definition) 120 except MacroEvalError as ex: 121 raise_config_error(f"Failed to evaluate macro '{definition}'. {ex}", self._path) 122 123 try: 124 expression = macro_evaluator.transform(expression) # type: ignore 125 except MacroEvalError as ex: 126 raise_config_error(f"Failed to resolve macro for expression. {ex}", self._path) 127 128 return expression 129 130 131class QueryRenderer(ExpressionRenderer): 132 def __init__( 133 self, 134 query: exp.Expression, 135 dialect: str, 136 macro_definitions: t.List[d.MacroDef], 137 path: Path = Path(), 138 jinja_macro_registry: t.Optional[JinjaMacroRegistry] = None, 139 python_env: t.Optional[t.Dict[str, Executable]] = None, 140 time_column: t.Optional[TimeColumn] = None, 141 time_converter: t.Optional[t.Callable[[TimeLike], exp.Expression]] = None, 142 only_latest: bool = False, 143 ): 144 super().__init__( 145 expression=query, 146 dialect=dialect, 147 macro_definitions=macro_definitions, 148 path=path, 149 jinja_macro_registry=jinja_macro_registry, 150 python_env=python_env, 151 only_latest=only_latest, 152 ) 153 154 self._time_column = time_column 155 self._time_converter = time_converter or (lambda v: exp.convert(v)) 156 157 self._query_cache: t.Dict[t.Tuple[datetime, datetime, datetime], exp.Subqueryable] = {} 158 self._schema: t.Optional[MappingSchema] = None 159 160 def render( 161 self, 162 start: t.Optional[TimeLike] = None, 163 end: t.Optional[TimeLike] = None, 164 latest: t.Optional[TimeLike] = None, 165 add_incremental_filter: bool = False, 166 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 167 expand: t.Iterable[str] = tuple(), 168 is_dev: bool = False, 169 **kwargs: t.Any, 170 ) -> exp.Subqueryable: 171 """Renders a query, expanding macros with provided kwargs, and optionally expanding referenced models. 172 173 Args: 174 query: The query to render. 175 start: The start datetime to render. Defaults to epoch start. 176 end: The end datetime to render. Defaults to epoch start. 177 latest: The latest datetime to use for non-incremental queries. Defaults to epoch start. 178 add_incremental_filter: Add an incremental filter to the query if the model is incremental. 179 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 180 expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries 181 that depend on materialized tables. Model definitions are inlined and can thus be run end to 182 end on the fly. 183 query_key: A query key used to look up a rendered query in the cache. 184 is_dev: Indicates whether the rendering happens in the development mode and temporary 185 tables / table clones should be used where applicable. 186 kwargs: Additional kwargs to pass to the renderer. 187 188 Returns: 189 The rendered expression. 190 """ 191 from sqlmesh.core.snapshot import to_table_mapping 192 193 dates = _dates(start, end, latest) 194 cache_key = dates 195 196 snapshots = snapshots or {} 197 mapping = to_table_mapping(snapshots.values(), is_dev) 198 # if a snapshot is provided but not mapped, we need to expand it or the query 199 # won't be valid 200 expand = set(expand) | {name for name in snapshots if name not in mapping} 201 202 query = self._expression 203 204 if cache_key not in self._query_cache: 205 query = super().render(start=start, end=end, latest=latest, **kwargs) # type: ignore 206 if not query: 207 raise ConfigError(f"Failed to render query {query}") 208 209 self._query_cache[cache_key] = t.cast(exp.Subqueryable, query) 210 211 try: 212 self._query_cache[cache_key] = optimize( 213 self._query_cache[cache_key], 214 schema=self._schema, 215 rules=RENDER_OPTIMIZER_RULES, 216 remove_unused_selections=False, 217 ) 218 except (SchemaError, OptimizeError): 219 pass 220 except SqlglotError as ex: 221 raise_config_error(f"Invalid model query. {ex}", self._path) 222 223 query = self._query_cache[cache_key] 224 225 if expand: 226 227 def _expand(node: exp.Expression) -> exp.Expression: 228 if isinstance(node, exp.Table) and snapshots: 229 name = exp.table_name(node) 230 model = snapshots[name].model if name in snapshots else None 231 if name in expand and model and not model.is_seed: 232 return model.render_query( 233 start=start, 234 end=end, 235 latest=latest, 236 snapshots=snapshots, 237 expand=expand, 238 is_dev=is_dev, 239 **kwargs, 240 ).subquery( 241 alias=node.alias or model.view_name, 242 copy=False, 243 ) 244 return node 245 246 query = query.transform(_expand) 247 248 # Ensure there is no data leakage in incremental mode by filtering out all 249 # events that have data outside the time window of interest. 250 if add_incremental_filter: 251 # expansion copies the query for us. if it doesn't occur, make sure to copy. 252 if not expand: 253 query = query.copy() 254 for node, _, _ in query.walk(prune=lambda n, *_: isinstance(n, exp.Select)): 255 if isinstance(node, exp.Select): 256 self.filter_time_column(node, *dates[0:2]) 257 258 if mapping: 259 return exp.replace_tables(query, mapping) 260 261 if not isinstance(query, exp.Subqueryable): 262 raise_config_error(f"Query needs to be a SELECT or a UNION {query}.", self._path) 263 264 return t.cast(exp.Subqueryable, query) 265 266 @property 267 def contains_star_query(self) -> bool: 268 """Returns True if the model's query contains a star projection.""" 269 return any(isinstance(expression, exp.Star) for expression in self.render().expressions) 270 271 def update_schema(self, schema: MappingSchema) -> None: 272 self._schema = schema 273 274 if self.contains_star_query: 275 # We need to re-render in order to expand the star projection 276 self._query_cache.clear() 277 self.render() 278 279 def filter_time_column(self, query: exp.Select, start: TimeLike, end: TimeLike) -> None: 280 """Filters a query on the time column to ensure no data leakage when running in incremental mode.""" 281 if not self._time_column: 282 return 283 284 low = self._time_converter(start) 285 high = self._time_converter(end) 286 287 time_column_identifier = exp.to_identifier(self._time_column.column) 288 if time_column_identifier is None: 289 raise_config_error( 290 f"Time column '{self._time_column.column}' must be a valid identifier.", 291 self._path, 292 ) 293 raise 294 295 time_column_projection = next( 296 ( 297 select 298 for select in query.selects 299 if select.alias_or_name == self._time_column.column 300 ), 301 time_column_identifier, 302 ) 303 304 if isinstance(time_column_projection, exp.Alias): 305 time_column_projection = time_column_projection.this 306 307 between = exp.Between(this=time_column_projection.copy(), low=low, high=high) 308 309 if not query.args.get("group"): 310 query.where(between, copy=False) 311 else: 312 query.having(between, copy=False) 313 314 simplify(query)
class
ExpressionRenderer:
51class ExpressionRenderer: 52 def __init__( 53 self, 54 expression: exp.Expression, 55 dialect: str, 56 macro_definitions: t.List[d.MacroDef], 57 path: Path = Path(), 58 jinja_macro_registry: t.Optional[JinjaMacroRegistry] = None, 59 python_env: t.Optional[t.Dict[str, Executable]] = None, 60 only_latest: bool = False, 61 ): 62 self._expression = expression 63 self._dialect = dialect 64 self._macro_definitions = macro_definitions 65 self._path = path 66 self._jinja_macro_registry = jinja_macro_registry or JinjaMacroRegistry() 67 self._python_env = python_env or {} 68 self._only_latest = only_latest 69 70 def render( 71 self, 72 start: t.Optional[TimeLike] = None, 73 end: t.Optional[TimeLike] = None, 74 latest: t.Optional[TimeLike] = None, 75 **kwargs: t.Any, 76 ) -> t.Optional[exp.Expression]: 77 """Renders a expression, expanding macros with provided kwargs 78 79 Args: 80 start: The start datetime to render. Defaults to epoch start. 81 end: The end datetime to render. Defaults to epoch start. 82 latest: The latest datetime to use for non-incremental models. Defaults to epoch start. 83 kwargs: Additional kwargs to pass to the renderer. 84 85 Returns: 86 The rendered expression. 87 """ 88 expression = self._expression 89 90 render_kwargs = { 91 **date_dict(*_dates(start, end, latest), only_latest=self._only_latest), 92 **kwargs, 93 } 94 95 env = prepare_env(self._python_env) 96 jinja_env = self._jinja_macro_registry.build_environment(**{**render_kwargs, **env}) 97 98 if isinstance(expression, d.Jinja): 99 try: 100 rendered_expression = jinja_env.from_string(expression.name).render() 101 if not rendered_expression: 102 return None 103 104 parsed_expression = parse_one(rendered_expression, read=self._dialect) 105 if not parsed_expression: 106 raise ConfigError(f"Failed to parse a expression {expression}") 107 expression = parsed_expression 108 except Exception as ex: 109 raise ConfigError(f"Invalid expression. {ex} at '{self._path}'") from ex 110 111 macro_evaluator = MacroEvaluator( 112 self._dialect, 113 python_env=self._python_env, 114 jinja_env=jinja_env, 115 ) 116 macro_evaluator.locals.update(render_kwargs) 117 118 for definition in self._macro_definitions: 119 try: 120 macro_evaluator.evaluate(definition) 121 except MacroEvalError as ex: 122 raise_config_error(f"Failed to evaluate macro '{definition}'. {ex}", self._path) 123 124 try: 125 expression = macro_evaluator.transform(expression) # type: ignore 126 except MacroEvalError as ex: 127 raise_config_error(f"Failed to resolve macro for expression. {ex}", self._path) 128 129 return expression
ExpressionRenderer( expression: sqlglot.expressions.Expression, dialect: str, macro_definitions: List[sqlmesh.core.dialect.MacroDef], path: pathlib.Path = PosixPath('.'), jinja_macro_registry: Optional[sqlmesh.utils.jinja.JinjaMacroRegistry] = None, python_env: Optional[Dict[str, sqlmesh.utils.metaprogramming.Executable]] = None, only_latest: bool = False)
52 def __init__( 53 self, 54 expression: exp.Expression, 55 dialect: str, 56 macro_definitions: t.List[d.MacroDef], 57 path: Path = Path(), 58 jinja_macro_registry: t.Optional[JinjaMacroRegistry] = None, 59 python_env: t.Optional[t.Dict[str, Executable]] = None, 60 only_latest: bool = False, 61 ): 62 self._expression = expression 63 self._dialect = dialect 64 self._macro_definitions = macro_definitions 65 self._path = path 66 self._jinja_macro_registry = jinja_macro_registry or JinjaMacroRegistry() 67 self._python_env = python_env or {} 68 self._only_latest = only_latest
def
render( self, start: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, end: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, latest: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, **kwargs: Any) -> Optional[sqlglot.expressions.Expression]:
70 def render( 71 self, 72 start: t.Optional[TimeLike] = None, 73 end: t.Optional[TimeLike] = None, 74 latest: t.Optional[TimeLike] = None, 75 **kwargs: t.Any, 76 ) -> t.Optional[exp.Expression]: 77 """Renders a expression, expanding macros with provided kwargs 78 79 Args: 80 start: The start datetime to render. Defaults to epoch start. 81 end: The end datetime to render. Defaults to epoch start. 82 latest: The latest datetime to use for non-incremental models. Defaults to epoch start. 83 kwargs: Additional kwargs to pass to the renderer. 84 85 Returns: 86 The rendered expression. 87 """ 88 expression = self._expression 89 90 render_kwargs = { 91 **date_dict(*_dates(start, end, latest), only_latest=self._only_latest), 92 **kwargs, 93 } 94 95 env = prepare_env(self._python_env) 96 jinja_env = self._jinja_macro_registry.build_environment(**{**render_kwargs, **env}) 97 98 if isinstance(expression, d.Jinja): 99 try: 100 rendered_expression = jinja_env.from_string(expression.name).render() 101 if not rendered_expression: 102 return None 103 104 parsed_expression = parse_one(rendered_expression, read=self._dialect) 105 if not parsed_expression: 106 raise ConfigError(f"Failed to parse a expression {expression}") 107 expression = parsed_expression 108 except Exception as ex: 109 raise ConfigError(f"Invalid expression. {ex} at '{self._path}'") from ex 110 111 macro_evaluator = MacroEvaluator( 112 self._dialect, 113 python_env=self._python_env, 114 jinja_env=jinja_env, 115 ) 116 macro_evaluator.locals.update(render_kwargs) 117 118 for definition in self._macro_definitions: 119 try: 120 macro_evaluator.evaluate(definition) 121 except MacroEvalError as ex: 122 raise_config_error(f"Failed to evaluate macro '{definition}'. {ex}", self._path) 123 124 try: 125 expression = macro_evaluator.transform(expression) # type: ignore 126 except MacroEvalError as ex: 127 raise_config_error(f"Failed to resolve macro for expression. {ex}", self._path) 128 129 return expression
Renders a expression, expanding macros with provided kwargs
Arguments:
- start: The start datetime to render. Defaults to epoch start.
- end: The end datetime to render. Defaults to epoch start.
- latest: The latest datetime to use for non-incremental models. Defaults to epoch start.
- kwargs: Additional kwargs to pass to the renderer.
Returns:
The rendered expression.
132class QueryRenderer(ExpressionRenderer): 133 def __init__( 134 self, 135 query: exp.Expression, 136 dialect: str, 137 macro_definitions: t.List[d.MacroDef], 138 path: Path = Path(), 139 jinja_macro_registry: t.Optional[JinjaMacroRegistry] = None, 140 python_env: t.Optional[t.Dict[str, Executable]] = None, 141 time_column: t.Optional[TimeColumn] = None, 142 time_converter: t.Optional[t.Callable[[TimeLike], exp.Expression]] = None, 143 only_latest: bool = False, 144 ): 145 super().__init__( 146 expression=query, 147 dialect=dialect, 148 macro_definitions=macro_definitions, 149 path=path, 150 jinja_macro_registry=jinja_macro_registry, 151 python_env=python_env, 152 only_latest=only_latest, 153 ) 154 155 self._time_column = time_column 156 self._time_converter = time_converter or (lambda v: exp.convert(v)) 157 158 self._query_cache: t.Dict[t.Tuple[datetime, datetime, datetime], exp.Subqueryable] = {} 159 self._schema: t.Optional[MappingSchema] = None 160 161 def render( 162 self, 163 start: t.Optional[TimeLike] = None, 164 end: t.Optional[TimeLike] = None, 165 latest: t.Optional[TimeLike] = None, 166 add_incremental_filter: bool = False, 167 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 168 expand: t.Iterable[str] = tuple(), 169 is_dev: bool = False, 170 **kwargs: t.Any, 171 ) -> exp.Subqueryable: 172 """Renders a query, expanding macros with provided kwargs, and optionally expanding referenced models. 173 174 Args: 175 query: The query to render. 176 start: The start datetime to render. Defaults to epoch start. 177 end: The end datetime to render. Defaults to epoch start. 178 latest: The latest datetime to use for non-incremental queries. Defaults to epoch start. 179 add_incremental_filter: Add an incremental filter to the query if the model is incremental. 180 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 181 expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries 182 that depend on materialized tables. Model definitions are inlined and can thus be run end to 183 end on the fly. 184 query_key: A query key used to look up a rendered query in the cache. 185 is_dev: Indicates whether the rendering happens in the development mode and temporary 186 tables / table clones should be used where applicable. 187 kwargs: Additional kwargs to pass to the renderer. 188 189 Returns: 190 The rendered expression. 191 """ 192 from sqlmesh.core.snapshot import to_table_mapping 193 194 dates = _dates(start, end, latest) 195 cache_key = dates 196 197 snapshots = snapshots or {} 198 mapping = to_table_mapping(snapshots.values(), is_dev) 199 # if a snapshot is provided but not mapped, we need to expand it or the query 200 # won't be valid 201 expand = set(expand) | {name for name in snapshots if name not in mapping} 202 203 query = self._expression 204 205 if cache_key not in self._query_cache: 206 query = super().render(start=start, end=end, latest=latest, **kwargs) # type: ignore 207 if not query: 208 raise ConfigError(f"Failed to render query {query}") 209 210 self._query_cache[cache_key] = t.cast(exp.Subqueryable, query) 211 212 try: 213 self._query_cache[cache_key] = optimize( 214 self._query_cache[cache_key], 215 schema=self._schema, 216 rules=RENDER_OPTIMIZER_RULES, 217 remove_unused_selections=False, 218 ) 219 except (SchemaError, OptimizeError): 220 pass 221 except SqlglotError as ex: 222 raise_config_error(f"Invalid model query. {ex}", self._path) 223 224 query = self._query_cache[cache_key] 225 226 if expand: 227 228 def _expand(node: exp.Expression) -> exp.Expression: 229 if isinstance(node, exp.Table) and snapshots: 230 name = exp.table_name(node) 231 model = snapshots[name].model if name in snapshots else None 232 if name in expand and model and not model.is_seed: 233 return model.render_query( 234 start=start, 235 end=end, 236 latest=latest, 237 snapshots=snapshots, 238 expand=expand, 239 is_dev=is_dev, 240 **kwargs, 241 ).subquery( 242 alias=node.alias or model.view_name, 243 copy=False, 244 ) 245 return node 246 247 query = query.transform(_expand) 248 249 # Ensure there is no data leakage in incremental mode by filtering out all 250 # events that have data outside the time window of interest. 251 if add_incremental_filter: 252 # expansion copies the query for us. if it doesn't occur, make sure to copy. 253 if not expand: 254 query = query.copy() 255 for node, _, _ in query.walk(prune=lambda n, *_: isinstance(n, exp.Select)): 256 if isinstance(node, exp.Select): 257 self.filter_time_column(node, *dates[0:2]) 258 259 if mapping: 260 return exp.replace_tables(query, mapping) 261 262 if not isinstance(query, exp.Subqueryable): 263 raise_config_error(f"Query needs to be a SELECT or a UNION {query}.", self._path) 264 265 return t.cast(exp.Subqueryable, query) 266 267 @property 268 def contains_star_query(self) -> bool: 269 """Returns True if the model's query contains a star projection.""" 270 return any(isinstance(expression, exp.Star) for expression in self.render().expressions) 271 272 def update_schema(self, schema: MappingSchema) -> None: 273 self._schema = schema 274 275 if self.contains_star_query: 276 # We need to re-render in order to expand the star projection 277 self._query_cache.clear() 278 self.render() 279 280 def filter_time_column(self, query: exp.Select, start: TimeLike, end: TimeLike) -> None: 281 """Filters a query on the time column to ensure no data leakage when running in incremental mode.""" 282 if not self._time_column: 283 return 284 285 low = self._time_converter(start) 286 high = self._time_converter(end) 287 288 time_column_identifier = exp.to_identifier(self._time_column.column) 289 if time_column_identifier is None: 290 raise_config_error( 291 f"Time column '{self._time_column.column}' must be a valid identifier.", 292 self._path, 293 ) 294 raise 295 296 time_column_projection = next( 297 ( 298 select 299 for select in query.selects 300 if select.alias_or_name == self._time_column.column 301 ), 302 time_column_identifier, 303 ) 304 305 if isinstance(time_column_projection, exp.Alias): 306 time_column_projection = time_column_projection.this 307 308 between = exp.Between(this=time_column_projection.copy(), low=low, high=high) 309 310 if not query.args.get("group"): 311 query.where(between, copy=False) 312 else: 313 query.having(between, copy=False) 314 315 simplify(query)
QueryRenderer( query: sqlglot.expressions.Expression, dialect: str, macro_definitions: List[sqlmesh.core.dialect.MacroDef], path: pathlib.Path = PosixPath('.'), jinja_macro_registry: Optional[sqlmesh.utils.jinja.JinjaMacroRegistry] = None, python_env: Optional[Dict[str, sqlmesh.utils.metaprogramming.Executable]] = None, time_column: Optional[sqlmesh.core.model.kind.TimeColumn] = None, time_converter: Optional[Callable[[Union[datetime.date, datetime.datetime, str, int, float]], sqlglot.expressions.Expression]] = None, only_latest: bool = False)
133 def __init__( 134 self, 135 query: exp.Expression, 136 dialect: str, 137 macro_definitions: t.List[d.MacroDef], 138 path: Path = Path(), 139 jinja_macro_registry: t.Optional[JinjaMacroRegistry] = None, 140 python_env: t.Optional[t.Dict[str, Executable]] = None, 141 time_column: t.Optional[TimeColumn] = None, 142 time_converter: t.Optional[t.Callable[[TimeLike], exp.Expression]] = None, 143 only_latest: bool = False, 144 ): 145 super().__init__( 146 expression=query, 147 dialect=dialect, 148 macro_definitions=macro_definitions, 149 path=path, 150 jinja_macro_registry=jinja_macro_registry, 151 python_env=python_env, 152 only_latest=only_latest, 153 ) 154 155 self._time_column = time_column 156 self._time_converter = time_converter or (lambda v: exp.convert(v)) 157 158 self._query_cache: t.Dict[t.Tuple[datetime, datetime, datetime], exp.Subqueryable] = {} 159 self._schema: t.Optional[MappingSchema] = None
def
render( self, start: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, end: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, latest: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, add_incremental_filter: bool = False, snapshots: Optional[Dict[str, sqlmesh.core.snapshot.definition.Snapshot]] = None, expand: Iterable[str] = (), is_dev: bool = False, **kwargs: Any) -> sqlglot.expressions.Subqueryable:
161 def render( 162 self, 163 start: t.Optional[TimeLike] = None, 164 end: t.Optional[TimeLike] = None, 165 latest: t.Optional[TimeLike] = None, 166 add_incremental_filter: bool = False, 167 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 168 expand: t.Iterable[str] = tuple(), 169 is_dev: bool = False, 170 **kwargs: t.Any, 171 ) -> exp.Subqueryable: 172 """Renders a query, expanding macros with provided kwargs, and optionally expanding referenced models. 173 174 Args: 175 query: The query to render. 176 start: The start datetime to render. Defaults to epoch start. 177 end: The end datetime to render. Defaults to epoch start. 178 latest: The latest datetime to use for non-incremental queries. Defaults to epoch start. 179 add_incremental_filter: Add an incremental filter to the query if the model is incremental. 180 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 181 expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries 182 that depend on materialized tables. Model definitions are inlined and can thus be run end to 183 end on the fly. 184 query_key: A query key used to look up a rendered query in the cache. 185 is_dev: Indicates whether the rendering happens in the development mode and temporary 186 tables / table clones should be used where applicable. 187 kwargs: Additional kwargs to pass to the renderer. 188 189 Returns: 190 The rendered expression. 191 """ 192 from sqlmesh.core.snapshot import to_table_mapping 193 194 dates = _dates(start, end, latest) 195 cache_key = dates 196 197 snapshots = snapshots or {} 198 mapping = to_table_mapping(snapshots.values(), is_dev) 199 # if a snapshot is provided but not mapped, we need to expand it or the query 200 # won't be valid 201 expand = set(expand) | {name for name in snapshots if name not in mapping} 202 203 query = self._expression 204 205 if cache_key not in self._query_cache: 206 query = super().render(start=start, end=end, latest=latest, **kwargs) # type: ignore 207 if not query: 208 raise ConfigError(f"Failed to render query {query}") 209 210 self._query_cache[cache_key] = t.cast(exp.Subqueryable, query) 211 212 try: 213 self._query_cache[cache_key] = optimize( 214 self._query_cache[cache_key], 215 schema=self._schema, 216 rules=RENDER_OPTIMIZER_RULES, 217 remove_unused_selections=False, 218 ) 219 except (SchemaError, OptimizeError): 220 pass 221 except SqlglotError as ex: 222 raise_config_error(f"Invalid model query. {ex}", self._path) 223 224 query = self._query_cache[cache_key] 225 226 if expand: 227 228 def _expand(node: exp.Expression) -> exp.Expression: 229 if isinstance(node, exp.Table) and snapshots: 230 name = exp.table_name(node) 231 model = snapshots[name].model if name in snapshots else None 232 if name in expand and model and not model.is_seed: 233 return model.render_query( 234 start=start, 235 end=end, 236 latest=latest, 237 snapshots=snapshots, 238 expand=expand, 239 is_dev=is_dev, 240 **kwargs, 241 ).subquery( 242 alias=node.alias or model.view_name, 243 copy=False, 244 ) 245 return node 246 247 query = query.transform(_expand) 248 249 # Ensure there is no data leakage in incremental mode by filtering out all 250 # events that have data outside the time window of interest. 251 if add_incremental_filter: 252 # expansion copies the query for us. if it doesn't occur, make sure to copy. 253 if not expand: 254 query = query.copy() 255 for node, _, _ in query.walk(prune=lambda n, *_: isinstance(n, exp.Select)): 256 if isinstance(node, exp.Select): 257 self.filter_time_column(node, *dates[0:2]) 258 259 if mapping: 260 return exp.replace_tables(query, mapping) 261 262 if not isinstance(query, exp.Subqueryable): 263 raise_config_error(f"Query needs to be a SELECT or a UNION {query}.", self._path) 264 265 return t.cast(exp.Subqueryable, query)
Renders a query, expanding macros with provided kwargs, and optionally expanding referenced models.
Arguments:
- query: The query to render.
- start: The start datetime to render. Defaults to epoch start.
- end: The end datetime to render. Defaults to epoch start.
- latest: The latest datetime to use for non-incremental queries. Defaults to epoch start.
- add_incremental_filter: Add an incremental filter to the query if the model is incremental.
- snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
- expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries that depend on materialized tables. Model definitions are inlined and can thus be run end to end on the fly.
- query_key: A query key used to look up a rendered query in the cache.
- is_dev: Indicates whether the rendering happens in the development mode and temporary tables / table clones should be used where applicable.
- kwargs: Additional kwargs to pass to the renderer.
Returns:
The rendered expression.
def
filter_time_column( self, query: sqlglot.expressions.Select, start: Union[datetime.date, datetime.datetime, str, int, float], end: Union[datetime.date, datetime.datetime, str, int, float]) -> None:
280 def filter_time_column(self, query: exp.Select, start: TimeLike, end: TimeLike) -> None: 281 """Filters a query on the time column to ensure no data leakage when running in incremental mode.""" 282 if not self._time_column: 283 return 284 285 low = self._time_converter(start) 286 high = self._time_converter(end) 287 288 time_column_identifier = exp.to_identifier(self._time_column.column) 289 if time_column_identifier is None: 290 raise_config_error( 291 f"Time column '{self._time_column.column}' must be a valid identifier.", 292 self._path, 293 ) 294 raise 295 296 time_column_projection = next( 297 ( 298 select 299 for select in query.selects 300 if select.alias_or_name == self._time_column.column 301 ), 302 time_column_identifier, 303 ) 304 305 if isinstance(time_column_projection, exp.Alias): 306 time_column_projection = time_column_projection.this 307 308 between = exp.Between(this=time_column_projection.copy(), low=low, high=high) 309 310 if not query.args.get("group"): 311 query.where(between, copy=False) 312 else: 313 query.having(between, copy=False) 314 315 simplify(query)
Filters a query on the time column to ensure no data leakage when running in incremental mode.