Edit on GitHub

sqlmesh.core.renderer

  1from __future__ import annotations
  2
  3import typing as t
  4from datetime import datetime
  5from pathlib import Path
  6
  7from sqlglot import exp, parse_one
  8from sqlglot.errors import OptimizeError, SchemaError, SqlglotError
  9from sqlglot.optimizer import optimize
 10from sqlglot.optimizer.annotate_types import annotate_types
 11from sqlglot.optimizer.expand_laterals import expand_laterals
 12from sqlglot.optimizer.pushdown_projections import pushdown_projections
 13from sqlglot.optimizer.qualify_columns import qualify_columns
 14from sqlglot.optimizer.qualify_tables import qualify_tables
 15from sqlglot.optimizer.simplify import simplify
 16from sqlglot.schema import MappingSchema
 17
 18from sqlmesh.core import constants as c
 19from sqlmesh.core import dialect as d
 20from sqlmesh.core.macros import MacroEvaluator
 21from sqlmesh.core.model.kind import TimeColumn
 22from sqlmesh.utils.date import TimeLike, date_dict, make_inclusive, to_datetime
 23from sqlmesh.utils.errors import ConfigError, MacroEvalError, raise_config_error
 24from sqlmesh.utils.jinja import JinjaMacroRegistry
 25from sqlmesh.utils.metaprogramming import Executable, prepare_env
 26
 27if t.TYPE_CHECKING:
 28    from sqlmesh.core.snapshot import Snapshot
 29
 30RENDER_OPTIMIZER_RULES = (
 31    qualify_tables,
 32    qualify_columns,
 33    expand_laterals,
 34    pushdown_projections,
 35    annotate_types,
 36)
 37
 38
 39def _dates(
 40    start: t.Optional[TimeLike] = None,
 41    end: t.Optional[TimeLike] = None,
 42    latest: t.Optional[TimeLike] = None,
 43) -> t.Tuple[datetime, datetime, datetime]:
 44    return (
 45        *make_inclusive(start or c.EPOCH, end or c.EPOCH),
 46        to_datetime(latest or c.EPOCH),
 47    )
 48
 49
 50class ExpressionRenderer:
 51    def __init__(
 52        self,
 53        expression: exp.Expression,
 54        dialect: str,
 55        macro_definitions: t.List[d.MacroDef],
 56        path: Path = Path(),
 57        jinja_macro_registry: t.Optional[JinjaMacroRegistry] = None,
 58        python_env: t.Optional[t.Dict[str, Executable]] = None,
 59        only_latest: bool = False,
 60    ):
 61        self._expression = expression
 62        self._dialect = dialect
 63        self._macro_definitions = macro_definitions
 64        self._path = path
 65        self._jinja_macro_registry = jinja_macro_registry or JinjaMacroRegistry()
 66        self._python_env = python_env or {}
 67        self._only_latest = only_latest
 68
 69    def render(
 70        self,
 71        start: t.Optional[TimeLike] = None,
 72        end: t.Optional[TimeLike] = None,
 73        latest: t.Optional[TimeLike] = None,
 74        **kwargs: t.Any,
 75    ) -> t.Optional[exp.Expression]:
 76        """Renders a expression, expanding macros with provided kwargs
 77
 78        Args:
 79            start: The start datetime to render. Defaults to epoch start.
 80            end: The end datetime to render. Defaults to epoch start.
 81            latest: The latest datetime to use for non-incremental models. Defaults to epoch start.
 82            kwargs: Additional kwargs to pass to the renderer.
 83
 84        Returns:
 85            The rendered expression.
 86        """
 87        expression = self._expression
 88
 89        render_kwargs = {
 90            **date_dict(*_dates(start, end, latest), only_latest=self._only_latest),
 91            **kwargs,
 92        }
 93
 94        env = prepare_env(self._python_env)
 95        jinja_env = self._jinja_macro_registry.build_environment(**{**render_kwargs, **env})
 96
 97        if isinstance(expression, d.Jinja):
 98            try:
 99                rendered_expression = jinja_env.from_string(expression.name).render()
100                if not rendered_expression:
101                    return None
102
103                parsed_expression = parse_one(rendered_expression, read=self._dialect)
104                if not parsed_expression:
105                    raise ConfigError(f"Failed to parse a expression {expression}")
106                expression = parsed_expression
107            except Exception as ex:
108                raise ConfigError(f"Invalid expression. {ex} at '{self._path}'") from ex
109
110        macro_evaluator = MacroEvaluator(
111            self._dialect,
112            python_env=self._python_env,
113            jinja_env=jinja_env,
114        )
115        macro_evaluator.locals.update(render_kwargs)
116
117        for definition in self._macro_definitions:
118            try:
119                macro_evaluator.evaluate(definition)
120            except MacroEvalError as ex:
121                raise_config_error(f"Failed to evaluate macro '{definition}'. {ex}", self._path)
122
123        try:
124            expression = macro_evaluator.transform(expression)  # type: ignore
125        except MacroEvalError as ex:
126            raise_config_error(f"Failed to resolve macro for expression. {ex}", self._path)
127
128        return expression
129
130
131class QueryRenderer(ExpressionRenderer):
132    def __init__(
133        self,
134        query: exp.Expression,
135        dialect: str,
136        macro_definitions: t.List[d.MacroDef],
137        path: Path = Path(),
138        jinja_macro_registry: t.Optional[JinjaMacroRegistry] = None,
139        python_env: t.Optional[t.Dict[str, Executable]] = None,
140        time_column: t.Optional[TimeColumn] = None,
141        time_converter: t.Optional[t.Callable[[TimeLike], exp.Expression]] = None,
142        only_latest: bool = False,
143    ):
144        super().__init__(
145            expression=query,
146            dialect=dialect,
147            macro_definitions=macro_definitions,
148            path=path,
149            jinja_macro_registry=jinja_macro_registry,
150            python_env=python_env,
151            only_latest=only_latest,
152        )
153
154        self._time_column = time_column
155        self._time_converter = time_converter or (lambda v: exp.convert(v))
156
157        self._query_cache: t.Dict[t.Tuple[datetime, datetime, datetime], exp.Subqueryable] = {}
158        self._schema: t.Optional[MappingSchema] = None
159
160    def render(
161        self,
162        start: t.Optional[TimeLike] = None,
163        end: t.Optional[TimeLike] = None,
164        latest: t.Optional[TimeLike] = None,
165        add_incremental_filter: bool = False,
166        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
167        expand: t.Iterable[str] = tuple(),
168        is_dev: bool = False,
169        **kwargs: t.Any,
170    ) -> exp.Subqueryable:
171        """Renders a query, expanding macros with provided kwargs, and optionally expanding referenced models.
172
173        Args:
174            query: The query to render.
175            start: The start datetime to render. Defaults to epoch start.
176            end: The end datetime to render. Defaults to epoch start.
177            latest: The latest datetime to use for non-incremental queries. Defaults to epoch start.
178            add_incremental_filter: Add an incremental filter to the query if the model is incremental.
179            snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
180            expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries
181                that depend on materialized tables.  Model definitions are inlined and can thus be run end to
182                end on the fly.
183            query_key: A query key used to look up a rendered query in the cache.
184            is_dev: Indicates whether the rendering happens in the development mode and temporary
185                tables / table clones should be used where applicable.
186            kwargs: Additional kwargs to pass to the renderer.
187
188        Returns:
189            The rendered expression.
190        """
191        from sqlmesh.core.snapshot import to_table_mapping
192
193        dates = _dates(start, end, latest)
194        cache_key = dates
195
196        snapshots = snapshots or {}
197        mapping = to_table_mapping(snapshots.values(), is_dev)
198        # if a snapshot is provided but not mapped, we need to expand it or the query
199        # won't be valid
200        expand = set(expand) | {name for name in snapshots if name not in mapping}
201
202        query = self._expression
203
204        if cache_key not in self._query_cache:
205            query = super().render(start=start, end=end, latest=latest, **kwargs)  # type: ignore
206            if not query:
207                raise ConfigError(f"Failed to render query {query}")
208
209            self._query_cache[cache_key] = t.cast(exp.Subqueryable, query)
210
211            try:
212                self._query_cache[cache_key] = optimize(
213                    self._query_cache[cache_key],
214                    schema=self._schema,
215                    rules=RENDER_OPTIMIZER_RULES,
216                    remove_unused_selections=False,
217                )
218            except (SchemaError, OptimizeError):
219                pass
220            except SqlglotError as ex:
221                raise_config_error(f"Invalid model query. {ex}", self._path)
222
223        query = self._query_cache[cache_key]
224
225        if expand:
226
227            def _expand(node: exp.Expression) -> exp.Expression:
228                if isinstance(node, exp.Table) and snapshots:
229                    name = exp.table_name(node)
230                    model = snapshots[name].model if name in snapshots else None
231                    if name in expand and model and not model.is_seed:
232                        return model.render_query(
233                            start=start,
234                            end=end,
235                            latest=latest,
236                            snapshots=snapshots,
237                            expand=expand,
238                            is_dev=is_dev,
239                            **kwargs,
240                        ).subquery(
241                            alias=node.alias or model.view_name,
242                            copy=False,
243                        )
244                return node
245
246            query = query.transform(_expand)
247
248        # Ensure there is no data leakage in incremental mode by filtering out all
249        # events that have data outside the time window of interest.
250        if add_incremental_filter:
251            # expansion copies the query for us. if it doesn't occur, make sure to copy.
252            if not expand:
253                query = query.copy()
254            for node, _, _ in query.walk(prune=lambda n, *_: isinstance(n, exp.Select)):
255                if isinstance(node, exp.Select):
256                    self.filter_time_column(node, *dates[0:2])
257
258        if mapping:
259            return exp.replace_tables(query, mapping)
260
261        if not isinstance(query, exp.Subqueryable):
262            raise_config_error(f"Query needs to be a SELECT or a UNION {query}.", self._path)
263
264        return t.cast(exp.Subqueryable, query)
265
266    @property
267    def contains_star_query(self) -> bool:
268        """Returns True if the model's query contains a star projection."""
269        return any(isinstance(expression, exp.Star) for expression in self.render().expressions)
270
271    def update_schema(self, schema: MappingSchema) -> None:
272        self._schema = schema
273
274        if self.contains_star_query:
275            # We need to re-render in order to expand the star projection
276            self._query_cache.clear()
277            self.render()
278
279    def filter_time_column(self, query: exp.Select, start: TimeLike, end: TimeLike) -> None:
280        """Filters a query on the time column to ensure no data leakage when running in incremental mode."""
281        if not self._time_column:
282            return
283
284        low = self._time_converter(start)
285        high = self._time_converter(end)
286
287        time_column_identifier = exp.to_identifier(self._time_column.column)
288        if time_column_identifier is None:
289            raise_config_error(
290                f"Time column '{self._time_column.column}' must be a valid identifier.",
291                self._path,
292            )
293            raise
294
295        time_column_projection = next(
296            (
297                select
298                for select in query.selects
299                if select.alias_or_name == self._time_column.column
300            ),
301            time_column_identifier,
302        )
303
304        if isinstance(time_column_projection, exp.Alias):
305            time_column_projection = time_column_projection.this
306
307        between = exp.Between(this=time_column_projection.copy(), low=low, high=high)
308
309        if not query.args.get("group"):
310            query.where(between, copy=False)
311        else:
312            query.having(between, copy=False)
313
314        simplify(query)
class ExpressionRenderer:
 51class ExpressionRenderer:
 52    def __init__(
 53        self,
 54        expression: exp.Expression,
 55        dialect: str,
 56        macro_definitions: t.List[d.MacroDef],
 57        path: Path = Path(),
 58        jinja_macro_registry: t.Optional[JinjaMacroRegistry] = None,
 59        python_env: t.Optional[t.Dict[str, Executable]] = None,
 60        only_latest: bool = False,
 61    ):
 62        self._expression = expression
 63        self._dialect = dialect
 64        self._macro_definitions = macro_definitions
 65        self._path = path
 66        self._jinja_macro_registry = jinja_macro_registry or JinjaMacroRegistry()
 67        self._python_env = python_env or {}
 68        self._only_latest = only_latest
 69
 70    def render(
 71        self,
 72        start: t.Optional[TimeLike] = None,
 73        end: t.Optional[TimeLike] = None,
 74        latest: t.Optional[TimeLike] = None,
 75        **kwargs: t.Any,
 76    ) -> t.Optional[exp.Expression]:
 77        """Renders a expression, expanding macros with provided kwargs
 78
 79        Args:
 80            start: The start datetime to render. Defaults to epoch start.
 81            end: The end datetime to render. Defaults to epoch start.
 82            latest: The latest datetime to use for non-incremental models. Defaults to epoch start.
 83            kwargs: Additional kwargs to pass to the renderer.
 84
 85        Returns:
 86            The rendered expression.
 87        """
 88        expression = self._expression
 89
 90        render_kwargs = {
 91            **date_dict(*_dates(start, end, latest), only_latest=self._only_latest),
 92            **kwargs,
 93        }
 94
 95        env = prepare_env(self._python_env)
 96        jinja_env = self._jinja_macro_registry.build_environment(**{**render_kwargs, **env})
 97
 98        if isinstance(expression, d.Jinja):
 99            try:
100                rendered_expression = jinja_env.from_string(expression.name).render()
101                if not rendered_expression:
102                    return None
103
104                parsed_expression = parse_one(rendered_expression, read=self._dialect)
105                if not parsed_expression:
106                    raise ConfigError(f"Failed to parse a expression {expression}")
107                expression = parsed_expression
108            except Exception as ex:
109                raise ConfigError(f"Invalid expression. {ex} at '{self._path}'") from ex
110
111        macro_evaluator = MacroEvaluator(
112            self._dialect,
113            python_env=self._python_env,
114            jinja_env=jinja_env,
115        )
116        macro_evaluator.locals.update(render_kwargs)
117
118        for definition in self._macro_definitions:
119            try:
120                macro_evaluator.evaluate(definition)
121            except MacroEvalError as ex:
122                raise_config_error(f"Failed to evaluate macro '{definition}'. {ex}", self._path)
123
124        try:
125            expression = macro_evaluator.transform(expression)  # type: ignore
126        except MacroEvalError as ex:
127            raise_config_error(f"Failed to resolve macro for expression. {ex}", self._path)
128
129        return expression
ExpressionRenderer( expression: sqlglot.expressions.Expression, dialect: str, macro_definitions: List[sqlmesh.core.dialect.MacroDef], path: pathlib.Path = PosixPath('.'), jinja_macro_registry: Optional[sqlmesh.utils.jinja.JinjaMacroRegistry] = None, python_env: Optional[Dict[str, sqlmesh.utils.metaprogramming.Executable]] = None, only_latest: bool = False)
52    def __init__(
53        self,
54        expression: exp.Expression,
55        dialect: str,
56        macro_definitions: t.List[d.MacroDef],
57        path: Path = Path(),
58        jinja_macro_registry: t.Optional[JinjaMacroRegistry] = None,
59        python_env: t.Optional[t.Dict[str, Executable]] = None,
60        only_latest: bool = False,
61    ):
62        self._expression = expression
63        self._dialect = dialect
64        self._macro_definitions = macro_definitions
65        self._path = path
66        self._jinja_macro_registry = jinja_macro_registry or JinjaMacroRegistry()
67        self._python_env = python_env or {}
68        self._only_latest = only_latest
def render( self, start: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, end: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, latest: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, **kwargs: Any) -> Optional[sqlglot.expressions.Expression]:
 70    def render(
 71        self,
 72        start: t.Optional[TimeLike] = None,
 73        end: t.Optional[TimeLike] = None,
 74        latest: t.Optional[TimeLike] = None,
 75        **kwargs: t.Any,
 76    ) -> t.Optional[exp.Expression]:
 77        """Renders a expression, expanding macros with provided kwargs
 78
 79        Args:
 80            start: The start datetime to render. Defaults to epoch start.
 81            end: The end datetime to render. Defaults to epoch start.
 82            latest: The latest datetime to use for non-incremental models. Defaults to epoch start.
 83            kwargs: Additional kwargs to pass to the renderer.
 84
 85        Returns:
 86            The rendered expression.
 87        """
 88        expression = self._expression
 89
 90        render_kwargs = {
 91            **date_dict(*_dates(start, end, latest), only_latest=self._only_latest),
 92            **kwargs,
 93        }
 94
 95        env = prepare_env(self._python_env)
 96        jinja_env = self._jinja_macro_registry.build_environment(**{**render_kwargs, **env})
 97
 98        if isinstance(expression, d.Jinja):
 99            try:
100                rendered_expression = jinja_env.from_string(expression.name).render()
101                if not rendered_expression:
102                    return None
103
104                parsed_expression = parse_one(rendered_expression, read=self._dialect)
105                if not parsed_expression:
106                    raise ConfigError(f"Failed to parse a expression {expression}")
107                expression = parsed_expression
108            except Exception as ex:
109                raise ConfigError(f"Invalid expression. {ex} at '{self._path}'") from ex
110
111        macro_evaluator = MacroEvaluator(
112            self._dialect,
113            python_env=self._python_env,
114            jinja_env=jinja_env,
115        )
116        macro_evaluator.locals.update(render_kwargs)
117
118        for definition in self._macro_definitions:
119            try:
120                macro_evaluator.evaluate(definition)
121            except MacroEvalError as ex:
122                raise_config_error(f"Failed to evaluate macro '{definition}'. {ex}", self._path)
123
124        try:
125            expression = macro_evaluator.transform(expression)  # type: ignore
126        except MacroEvalError as ex:
127            raise_config_error(f"Failed to resolve macro for expression. {ex}", self._path)
128
129        return expression

Renders a expression, expanding macros with provided kwargs

Arguments:
  • start: The start datetime to render. Defaults to epoch start.
  • end: The end datetime to render. Defaults to epoch start.
  • latest: The latest datetime to use for non-incremental models. Defaults to epoch start.
  • kwargs: Additional kwargs to pass to the renderer.
Returns:

The rendered expression.

class QueryRenderer(ExpressionRenderer):
132class QueryRenderer(ExpressionRenderer):
133    def __init__(
134        self,
135        query: exp.Expression,
136        dialect: str,
137        macro_definitions: t.List[d.MacroDef],
138        path: Path = Path(),
139        jinja_macro_registry: t.Optional[JinjaMacroRegistry] = None,
140        python_env: t.Optional[t.Dict[str, Executable]] = None,
141        time_column: t.Optional[TimeColumn] = None,
142        time_converter: t.Optional[t.Callable[[TimeLike], exp.Expression]] = None,
143        only_latest: bool = False,
144    ):
145        super().__init__(
146            expression=query,
147            dialect=dialect,
148            macro_definitions=macro_definitions,
149            path=path,
150            jinja_macro_registry=jinja_macro_registry,
151            python_env=python_env,
152            only_latest=only_latest,
153        )
154
155        self._time_column = time_column
156        self._time_converter = time_converter or (lambda v: exp.convert(v))
157
158        self._query_cache: t.Dict[t.Tuple[datetime, datetime, datetime], exp.Subqueryable] = {}
159        self._schema: t.Optional[MappingSchema] = None
160
161    def render(
162        self,
163        start: t.Optional[TimeLike] = None,
164        end: t.Optional[TimeLike] = None,
165        latest: t.Optional[TimeLike] = None,
166        add_incremental_filter: bool = False,
167        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
168        expand: t.Iterable[str] = tuple(),
169        is_dev: bool = False,
170        **kwargs: t.Any,
171    ) -> exp.Subqueryable:
172        """Renders a query, expanding macros with provided kwargs, and optionally expanding referenced models.
173
174        Args:
175            query: The query to render.
176            start: The start datetime to render. Defaults to epoch start.
177            end: The end datetime to render. Defaults to epoch start.
178            latest: The latest datetime to use for non-incremental queries. Defaults to epoch start.
179            add_incremental_filter: Add an incremental filter to the query if the model is incremental.
180            snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
181            expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries
182                that depend on materialized tables.  Model definitions are inlined and can thus be run end to
183                end on the fly.
184            query_key: A query key used to look up a rendered query in the cache.
185            is_dev: Indicates whether the rendering happens in the development mode and temporary
186                tables / table clones should be used where applicable.
187            kwargs: Additional kwargs to pass to the renderer.
188
189        Returns:
190            The rendered expression.
191        """
192        from sqlmesh.core.snapshot import to_table_mapping
193
194        dates = _dates(start, end, latest)
195        cache_key = dates
196
197        snapshots = snapshots or {}
198        mapping = to_table_mapping(snapshots.values(), is_dev)
199        # if a snapshot is provided but not mapped, we need to expand it or the query
200        # won't be valid
201        expand = set(expand) | {name for name in snapshots if name not in mapping}
202
203        query = self._expression
204
205        if cache_key not in self._query_cache:
206            query = super().render(start=start, end=end, latest=latest, **kwargs)  # type: ignore
207            if not query:
208                raise ConfigError(f"Failed to render query {query}")
209
210            self._query_cache[cache_key] = t.cast(exp.Subqueryable, query)
211
212            try:
213                self._query_cache[cache_key] = optimize(
214                    self._query_cache[cache_key],
215                    schema=self._schema,
216                    rules=RENDER_OPTIMIZER_RULES,
217                    remove_unused_selections=False,
218                )
219            except (SchemaError, OptimizeError):
220                pass
221            except SqlglotError as ex:
222                raise_config_error(f"Invalid model query. {ex}", self._path)
223
224        query = self._query_cache[cache_key]
225
226        if expand:
227
228            def _expand(node: exp.Expression) -> exp.Expression:
229                if isinstance(node, exp.Table) and snapshots:
230                    name = exp.table_name(node)
231                    model = snapshots[name].model if name in snapshots else None
232                    if name in expand and model and not model.is_seed:
233                        return model.render_query(
234                            start=start,
235                            end=end,
236                            latest=latest,
237                            snapshots=snapshots,
238                            expand=expand,
239                            is_dev=is_dev,
240                            **kwargs,
241                        ).subquery(
242                            alias=node.alias or model.view_name,
243                            copy=False,
244                        )
245                return node
246
247            query = query.transform(_expand)
248
249        # Ensure there is no data leakage in incremental mode by filtering out all
250        # events that have data outside the time window of interest.
251        if add_incremental_filter:
252            # expansion copies the query for us. if it doesn't occur, make sure to copy.
253            if not expand:
254                query = query.copy()
255            for node, _, _ in query.walk(prune=lambda n, *_: isinstance(n, exp.Select)):
256                if isinstance(node, exp.Select):
257                    self.filter_time_column(node, *dates[0:2])
258
259        if mapping:
260            return exp.replace_tables(query, mapping)
261
262        if not isinstance(query, exp.Subqueryable):
263            raise_config_error(f"Query needs to be a SELECT or a UNION {query}.", self._path)
264
265        return t.cast(exp.Subqueryable, query)
266
267    @property
268    def contains_star_query(self) -> bool:
269        """Returns True if the model's query contains a star projection."""
270        return any(isinstance(expression, exp.Star) for expression in self.render().expressions)
271
272    def update_schema(self, schema: MappingSchema) -> None:
273        self._schema = schema
274
275        if self.contains_star_query:
276            # We need to re-render in order to expand the star projection
277            self._query_cache.clear()
278            self.render()
279
280    def filter_time_column(self, query: exp.Select, start: TimeLike, end: TimeLike) -> None:
281        """Filters a query on the time column to ensure no data leakage when running in incremental mode."""
282        if not self._time_column:
283            return
284
285        low = self._time_converter(start)
286        high = self._time_converter(end)
287
288        time_column_identifier = exp.to_identifier(self._time_column.column)
289        if time_column_identifier is None:
290            raise_config_error(
291                f"Time column '{self._time_column.column}' must be a valid identifier.",
292                self._path,
293            )
294            raise
295
296        time_column_projection = next(
297            (
298                select
299                for select in query.selects
300                if select.alias_or_name == self._time_column.column
301            ),
302            time_column_identifier,
303        )
304
305        if isinstance(time_column_projection, exp.Alias):
306            time_column_projection = time_column_projection.this
307
308        between = exp.Between(this=time_column_projection.copy(), low=low, high=high)
309
310        if not query.args.get("group"):
311            query.where(between, copy=False)
312        else:
313            query.having(between, copy=False)
314
315        simplify(query)
QueryRenderer( query: sqlglot.expressions.Expression, dialect: str, macro_definitions: List[sqlmesh.core.dialect.MacroDef], path: pathlib.Path = PosixPath('.'), jinja_macro_registry: Optional[sqlmesh.utils.jinja.JinjaMacroRegistry] = None, python_env: Optional[Dict[str, sqlmesh.utils.metaprogramming.Executable]] = None, time_column: Optional[sqlmesh.core.model.kind.TimeColumn] = None, time_converter: Optional[Callable[[Union[datetime.date, datetime.datetime, str, int, float]], sqlglot.expressions.Expression]] = None, only_latest: bool = False)
133    def __init__(
134        self,
135        query: exp.Expression,
136        dialect: str,
137        macro_definitions: t.List[d.MacroDef],
138        path: Path = Path(),
139        jinja_macro_registry: t.Optional[JinjaMacroRegistry] = None,
140        python_env: t.Optional[t.Dict[str, Executable]] = None,
141        time_column: t.Optional[TimeColumn] = None,
142        time_converter: t.Optional[t.Callable[[TimeLike], exp.Expression]] = None,
143        only_latest: bool = False,
144    ):
145        super().__init__(
146            expression=query,
147            dialect=dialect,
148            macro_definitions=macro_definitions,
149            path=path,
150            jinja_macro_registry=jinja_macro_registry,
151            python_env=python_env,
152            only_latest=only_latest,
153        )
154
155        self._time_column = time_column
156        self._time_converter = time_converter or (lambda v: exp.convert(v))
157
158        self._query_cache: t.Dict[t.Tuple[datetime, datetime, datetime], exp.Subqueryable] = {}
159        self._schema: t.Optional[MappingSchema] = None
def render( self, start: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, end: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, latest: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, add_incremental_filter: bool = False, snapshots: Optional[Dict[str, sqlmesh.core.snapshot.definition.Snapshot]] = None, expand: Iterable[str] = (), is_dev: bool = False, **kwargs: Any) -> sqlglot.expressions.Subqueryable:
161    def render(
162        self,
163        start: t.Optional[TimeLike] = None,
164        end: t.Optional[TimeLike] = None,
165        latest: t.Optional[TimeLike] = None,
166        add_incremental_filter: bool = False,
167        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
168        expand: t.Iterable[str] = tuple(),
169        is_dev: bool = False,
170        **kwargs: t.Any,
171    ) -> exp.Subqueryable:
172        """Renders a query, expanding macros with provided kwargs, and optionally expanding referenced models.
173
174        Args:
175            query: The query to render.
176            start: The start datetime to render. Defaults to epoch start.
177            end: The end datetime to render. Defaults to epoch start.
178            latest: The latest datetime to use for non-incremental queries. Defaults to epoch start.
179            add_incremental_filter: Add an incremental filter to the query if the model is incremental.
180            snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
181            expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries
182                that depend on materialized tables.  Model definitions are inlined and can thus be run end to
183                end on the fly.
184            query_key: A query key used to look up a rendered query in the cache.
185            is_dev: Indicates whether the rendering happens in the development mode and temporary
186                tables / table clones should be used where applicable.
187            kwargs: Additional kwargs to pass to the renderer.
188
189        Returns:
190            The rendered expression.
191        """
192        from sqlmesh.core.snapshot import to_table_mapping
193
194        dates = _dates(start, end, latest)
195        cache_key = dates
196
197        snapshots = snapshots or {}
198        mapping = to_table_mapping(snapshots.values(), is_dev)
199        # if a snapshot is provided but not mapped, we need to expand it or the query
200        # won't be valid
201        expand = set(expand) | {name for name in snapshots if name not in mapping}
202
203        query = self._expression
204
205        if cache_key not in self._query_cache:
206            query = super().render(start=start, end=end, latest=latest, **kwargs)  # type: ignore
207            if not query:
208                raise ConfigError(f"Failed to render query {query}")
209
210            self._query_cache[cache_key] = t.cast(exp.Subqueryable, query)
211
212            try:
213                self._query_cache[cache_key] = optimize(
214                    self._query_cache[cache_key],
215                    schema=self._schema,
216                    rules=RENDER_OPTIMIZER_RULES,
217                    remove_unused_selections=False,
218                )
219            except (SchemaError, OptimizeError):
220                pass
221            except SqlglotError as ex:
222                raise_config_error(f"Invalid model query. {ex}", self._path)
223
224        query = self._query_cache[cache_key]
225
226        if expand:
227
228            def _expand(node: exp.Expression) -> exp.Expression:
229                if isinstance(node, exp.Table) and snapshots:
230                    name = exp.table_name(node)
231                    model = snapshots[name].model if name in snapshots else None
232                    if name in expand and model and not model.is_seed:
233                        return model.render_query(
234                            start=start,
235                            end=end,
236                            latest=latest,
237                            snapshots=snapshots,
238                            expand=expand,
239                            is_dev=is_dev,
240                            **kwargs,
241                        ).subquery(
242                            alias=node.alias or model.view_name,
243                            copy=False,
244                        )
245                return node
246
247            query = query.transform(_expand)
248
249        # Ensure there is no data leakage in incremental mode by filtering out all
250        # events that have data outside the time window of interest.
251        if add_incremental_filter:
252            # expansion copies the query for us. if it doesn't occur, make sure to copy.
253            if not expand:
254                query = query.copy()
255            for node, _, _ in query.walk(prune=lambda n, *_: isinstance(n, exp.Select)):
256                if isinstance(node, exp.Select):
257                    self.filter_time_column(node, *dates[0:2])
258
259        if mapping:
260            return exp.replace_tables(query, mapping)
261
262        if not isinstance(query, exp.Subqueryable):
263            raise_config_error(f"Query needs to be a SELECT or a UNION {query}.", self._path)
264
265        return t.cast(exp.Subqueryable, query)

Renders a query, expanding macros with provided kwargs, and optionally expanding referenced models.

Arguments:
  • query: The query to render.
  • start: The start datetime to render. Defaults to epoch start.
  • end: The end datetime to render. Defaults to epoch start.
  • latest: The latest datetime to use for non-incremental queries. Defaults to epoch start.
  • add_incremental_filter: Add an incremental filter to the query if the model is incremental.
  • snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
  • expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries that depend on materialized tables. Model definitions are inlined and can thus be run end to end on the fly.
  • query_key: A query key used to look up a rendered query in the cache.
  • is_dev: Indicates whether the rendering happens in the development mode and temporary tables / table clones should be used where applicable.
  • kwargs: Additional kwargs to pass to the renderer.
Returns:

The rendered expression.

contains_star_query: bool

Returns True if the model's query contains a star projection.

def update_schema(self, schema: sqlglot.schema.MappingSchema) -> None:
272    def update_schema(self, schema: MappingSchema) -> None:
273        self._schema = schema
274
275        if self.contains_star_query:
276            # We need to re-render in order to expand the star projection
277            self._query_cache.clear()
278            self.render()
def filter_time_column( self, query: sqlglot.expressions.Select, start: Union[datetime.date, datetime.datetime, str, int, float], end: Union[datetime.date, datetime.datetime, str, int, float]) -> None:
280    def filter_time_column(self, query: exp.Select, start: TimeLike, end: TimeLike) -> None:
281        """Filters a query on the time column to ensure no data leakage when running in incremental mode."""
282        if not self._time_column:
283            return
284
285        low = self._time_converter(start)
286        high = self._time_converter(end)
287
288        time_column_identifier = exp.to_identifier(self._time_column.column)
289        if time_column_identifier is None:
290            raise_config_error(
291                f"Time column '{self._time_column.column}' must be a valid identifier.",
292                self._path,
293            )
294            raise
295
296        time_column_projection = next(
297            (
298                select
299                for select in query.selects
300                if select.alias_or_name == self._time_column.column
301            ),
302            time_column_identifier,
303        )
304
305        if isinstance(time_column_projection, exp.Alias):
306            time_column_projection = time_column_projection.this
307
308        between = exp.Between(this=time_column_projection.copy(), low=low, high=high)
309
310        if not query.args.get("group"):
311            query.where(between, copy=False)
312        else:
313            query.having(between, copy=False)
314
315        simplify(query)

Filters a query on the time column to ensure no data leakage when running in incremental mode.