Edit on GitHub

sqlmesh.dbt.model

  1from __future__ import annotations
  2
  3import re
  4import typing as t
  5
  6from pydantic import validator
  7from sqlglot.helper import ensure_list
  8
  9from sqlmesh.core import dialect as d
 10from sqlmesh.core.config.base import UpdateStrategy
 11from sqlmesh.core.model import (
 12    IncrementalByTimeRangeKind,
 13    IncrementalByUniqueKeyKind,
 14    Model,
 15    ModelKind,
 16    ModelKindName,
 17    create_sql_model,
 18)
 19from sqlmesh.dbt.basemodel import BaseModelConfig, Materialization
 20from sqlmesh.dbt.common import DbtContext, SqlStr
 21from sqlmesh.dbt.target import TargetConfig
 22from sqlmesh.utils.errors import ConfigError
 23
 24INCREMENTAL_BY_TIME_STRATEGIES = set(["delete+insert", "insert_overwrite"])
 25INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES = set(["merge"])
 26
 27
 28def collection_to_str(collection: t.Iterable) -> str:
 29    return ", ".join(f"'{item}'" for item in collection)
 30
 31
 32class ModelConfig(BaseModelConfig):
 33    """
 34    ModelConfig contains all config parameters available to DBT models
 35
 36    See https://docs.getdbt.com/reference/configs-and-properties for
 37    a more detailed description of each config parameter under the
 38    General propreties, General configs, and For models sections.
 39
 40    Args:
 41        sql: The model sql
 42        time_column: The name of the time column
 43        partitioned_by: List of columns to partition by. time_column will automatically be
 44            included, if specified.
 45        cron: A cron string specifying how often the model should be refreshed, leveraging the
 46            [croniter](https://github.com/kiorky/croniter) library.
 47        dialect: The SQL dialect that the model's query is written in. By default,
 48            this is assumed to be the dialect of the context.
 49        batch_size: The maximum number of intervals that can be run per backfill job. If this is None,
 50            then backfilling this model will do all of history in one job. If this is set, a model's backfill
 51            will be chunked such that each individual job will only contain jobs with max `batch_size` intervals.
 52        start: The earliest date that the model will be backfilled for
 53        cluster_by: Field(s) to use for clustering in data warehouses that support clustering
 54        incremental_strategy: Strategy used to build the incremental model
 55        materialized: How the model will be materialized in the database
 56        sql_header: SQL statement to inject above create table/view as
 57        unique_key: List of columns that define row uniqueness for the model
 58    """
 59
 60    # sqlmesh fields
 61    sql: SqlStr = SqlStr("")
 62    time_column: t.Optional[str] = None
 63    partitioned_by: t.Optional[t.Union[t.List[str], str]] = None
 64    cron: t.Optional[str] = None
 65    dialect: t.Optional[str] = None
 66    batch_size: t.Optional[int]
 67
 68    # DBT configuration fields
 69    start: t.Optional[str] = None
 70    cluster_by: t.Optional[t.List[str]] = None
 71    incremental_strategy: t.Optional[str] = None
 72    materialized: Materialization = Materialization.VIEW
 73    sql_header: t.Optional[str] = None
 74    unique_key: t.Optional[t.List[str]] = None
 75
 76    # redshift
 77    bind: t.Optional[bool] = None
 78
 79    @validator(
 80        "unique_key",
 81        "cluster_by",
 82        "partitioned_by",
 83        pre=True,
 84    )
 85    def _validate_list(cls, v: t.Union[str, t.List[str]]) -> t.List[str]:
 86        return ensure_list(v)
 87
 88    @validator("sql", pre=True)
 89    def _validate_sql(cls, v: t.Union[str, SqlStr]) -> SqlStr:
 90        return SqlStr(v)
 91
 92    @validator("materialized", pre=True)
 93    def _validate_materialization(cls, v: str) -> Materialization:
 94        return Materialization(v.lower())
 95
 96    _FIELD_UPDATE_STRATEGY: t.ClassVar[t.Dict[str, UpdateStrategy]] = {
 97        **BaseModelConfig._FIELD_UPDATE_STRATEGY,
 98        **{
 99            "sql": UpdateStrategy.IMMUTABLE,
100            "time_column": UpdateStrategy.IMMUTABLE,
101        },
102    }
103
104    @property
105    def model_dialect(self) -> t.Optional[str]:
106        return self.dialect or self.meta.get("dialect", None)
107
108    @property
109    def model_materialization(self) -> Materialization:
110        return self.materialized
111
112    def model_kind(self, target: TargetConfig) -> ModelKind:
113        """
114        Get the sqlmesh ModelKind
115        Returns:
116            The sqlmesh ModelKind
117        """
118        materialization = self.materialized
119        if materialization == Materialization.TABLE:
120            return ModelKind(name=ModelKindName.FULL)
121        if materialization == Materialization.VIEW:
122            return ModelKind(name=ModelKindName.VIEW)
123        if materialization == Materialization.INCREMENTAL:
124            if self.time_column:
125                strategy = self.incremental_strategy or target.default_incremental_strategy(
126                    IncrementalByTimeRangeKind
127                )
128                if strategy not in INCREMENTAL_BY_TIME_STRATEGIES:
129                    raise ConfigError(
130                        f"SQLMesh IncrementalByTime not compatible with '{strategy}'"
131                        f" incremental strategy. Supported strategies include {collection_to_str(INCREMENTAL_BY_TIME_STRATEGIES)}."
132                    )
133                return IncrementalByTimeRangeKind(time_column=self.time_column)
134            if self.unique_key:
135                strategy = self.incremental_strategy or target.default_incremental_strategy(
136                    IncrementalByUniqueKeyKind
137                )
138                if strategy not in INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES:
139                    support_msg = (
140                        "does not currently support"
141                        if strategy is "append"
142                        else "not compatible with"
143                    )
144                    raise ConfigError(
145                        f"{self.model_name}: SQLMesh IncrementalByUniqueKey {support_msg} '{strategy}'"
146                        f" incremental strategy. Supported strategies include {collection_to_str(INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES)}."
147                    )
148                return IncrementalByUniqueKeyKind(unique_key=self.unique_key)
149
150            raise ConfigError(
151                f"{self.model_name}: Incremental materialization requires either "
152                f"time_column ({collection_to_str(INCREMENTAL_BY_TIME_STRATEGIES)}) or "
153                f"unique_key ({collection_to_str(INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES)}) configuration."
154            )
155        if materialization == Materialization.EPHEMERAL:
156            return ModelKind(name=ModelKindName.EMBEDDED)
157        raise ConfigError(f"{materialization.value} materialization not supported.")
158
159    @property
160    def sql_no_config(self) -> str:
161        matches = re.findall(r"{{\s*config\(", self.sql)
162        if matches:
163            config_macro_start = self.sql.index(matches[0])
164            cursor = config_macro_start
165            quote = None
166            while cursor < len(self.sql):
167                if self.sql[cursor] in ('"', "'"):
168                    if quote is None:
169                        quote = self.sql[cursor]
170                    elif quote == self.sql[cursor]:
171                        quote = None
172                if self.sql[cursor : cursor + 2] == "}}" and quote is None:
173                    return "".join([self.sql[:config_macro_start], self.sql[cursor + 2 :]])
174                cursor += 1
175        return self.sql
176
177    @property
178    def all_sql(self) -> SqlStr:
179        return SqlStr(";\n".join(self.pre_hook + [self.sql] + self.post_hook))
180
181    def to_sqlmesh(self, context: DbtContext) -> Model:
182        """Converts the dbt model into a SQLMesh model."""
183        model_context = self._context_for_dependencies(context, self._dependencies)
184        expressions = d.parse(self.sql_no_config)
185        if not expressions:
186            raise ConfigError(f"Model '{self.table_name}' must have a query.")
187
188        optional_kwargs: t.Dict[str, t.Any] = {}
189        if self.partitioned_by:
190            optional_kwargs["partitioned_by"] = self.partitioned_by
191        for field in ("cron", "batch_size"):
192            field_val = getattr(self, field, None) or self.meta.get(field, None)
193            if field_val:
194                optional_kwargs[field] = field_val
195
196        return create_sql_model(
197            self.model_name,
198            expressions[-1],
199            dialect=self.model_dialect or model_context.dialect,
200            kind=self.model_kind(context.target),
201            start=self.start,
202            statements=expressions[0:-1],
203            **optional_kwargs,
204            **self.sqlmesh_model_kwargs(model_context),
205        )
def collection_to_str(collection: Iterable) -> str:
29def collection_to_str(collection: t.Iterable) -> str:
30    return ", ".join(f"'{item}'" for item in collection)
class ModelConfig(sqlmesh.dbt.basemodel.BaseModelConfig):
 33class ModelConfig(BaseModelConfig):
 34    """
 35    ModelConfig contains all config parameters available to DBT models
 36
 37    See https://docs.getdbt.com/reference/configs-and-properties for
 38    a more detailed description of each config parameter under the
 39    General propreties, General configs, and For models sections.
 40
 41    Args:
 42        sql: The model sql
 43        time_column: The name of the time column
 44        partitioned_by: List of columns to partition by. time_column will automatically be
 45            included, if specified.
 46        cron: A cron string specifying how often the model should be refreshed, leveraging the
 47            [croniter](https://github.com/kiorky/croniter) library.
 48        dialect: The SQL dialect that the model's query is written in. By default,
 49            this is assumed to be the dialect of the context.
 50        batch_size: The maximum number of intervals that can be run per backfill job. If this is None,
 51            then backfilling this model will do all of history in one job. If this is set, a model's backfill
 52            will be chunked such that each individual job will only contain jobs with max `batch_size` intervals.
 53        start: The earliest date that the model will be backfilled for
 54        cluster_by: Field(s) to use for clustering in data warehouses that support clustering
 55        incremental_strategy: Strategy used to build the incremental model
 56        materialized: How the model will be materialized in the database
 57        sql_header: SQL statement to inject above create table/view as
 58        unique_key: List of columns that define row uniqueness for the model
 59    """
 60
 61    # sqlmesh fields
 62    sql: SqlStr = SqlStr("")
 63    time_column: t.Optional[str] = None
 64    partitioned_by: t.Optional[t.Union[t.List[str], str]] = None
 65    cron: t.Optional[str] = None
 66    dialect: t.Optional[str] = None
 67    batch_size: t.Optional[int]
 68
 69    # DBT configuration fields
 70    start: t.Optional[str] = None
 71    cluster_by: t.Optional[t.List[str]] = None
 72    incremental_strategy: t.Optional[str] = None
 73    materialized: Materialization = Materialization.VIEW
 74    sql_header: t.Optional[str] = None
 75    unique_key: t.Optional[t.List[str]] = None
 76
 77    # redshift
 78    bind: t.Optional[bool] = None
 79
 80    @validator(
 81        "unique_key",
 82        "cluster_by",
 83        "partitioned_by",
 84        pre=True,
 85    )
 86    def _validate_list(cls, v: t.Union[str, t.List[str]]) -> t.List[str]:
 87        return ensure_list(v)
 88
 89    @validator("sql", pre=True)
 90    def _validate_sql(cls, v: t.Union[str, SqlStr]) -> SqlStr:
 91        return SqlStr(v)
 92
 93    @validator("materialized", pre=True)
 94    def _validate_materialization(cls, v: str) -> Materialization:
 95        return Materialization(v.lower())
 96
 97    _FIELD_UPDATE_STRATEGY: t.ClassVar[t.Dict[str, UpdateStrategy]] = {
 98        **BaseModelConfig._FIELD_UPDATE_STRATEGY,
 99        **{
100            "sql": UpdateStrategy.IMMUTABLE,
101            "time_column": UpdateStrategy.IMMUTABLE,
102        },
103    }
104
105    @property
106    def model_dialect(self) -> t.Optional[str]:
107        return self.dialect or self.meta.get("dialect", None)
108
109    @property
110    def model_materialization(self) -> Materialization:
111        return self.materialized
112
113    def model_kind(self, target: TargetConfig) -> ModelKind:
114        """
115        Get the sqlmesh ModelKind
116        Returns:
117            The sqlmesh ModelKind
118        """
119        materialization = self.materialized
120        if materialization == Materialization.TABLE:
121            return ModelKind(name=ModelKindName.FULL)
122        if materialization == Materialization.VIEW:
123            return ModelKind(name=ModelKindName.VIEW)
124        if materialization == Materialization.INCREMENTAL:
125            if self.time_column:
126                strategy = self.incremental_strategy or target.default_incremental_strategy(
127                    IncrementalByTimeRangeKind
128                )
129                if strategy not in INCREMENTAL_BY_TIME_STRATEGIES:
130                    raise ConfigError(
131                        f"SQLMesh IncrementalByTime not compatible with '{strategy}'"
132                        f" incremental strategy. Supported strategies include {collection_to_str(INCREMENTAL_BY_TIME_STRATEGIES)}."
133                    )
134                return IncrementalByTimeRangeKind(time_column=self.time_column)
135            if self.unique_key:
136                strategy = self.incremental_strategy or target.default_incremental_strategy(
137                    IncrementalByUniqueKeyKind
138                )
139                if strategy not in INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES:
140                    support_msg = (
141                        "does not currently support"
142                        if strategy is "append"
143                        else "not compatible with"
144                    )
145                    raise ConfigError(
146                        f"{self.model_name}: SQLMesh IncrementalByUniqueKey {support_msg} '{strategy}'"
147                        f" incremental strategy. Supported strategies include {collection_to_str(INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES)}."
148                    )
149                return IncrementalByUniqueKeyKind(unique_key=self.unique_key)
150
151            raise ConfigError(
152                f"{self.model_name}: Incremental materialization requires either "
153                f"time_column ({collection_to_str(INCREMENTAL_BY_TIME_STRATEGIES)}) or "
154                f"unique_key ({collection_to_str(INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES)}) configuration."
155            )
156        if materialization == Materialization.EPHEMERAL:
157            return ModelKind(name=ModelKindName.EMBEDDED)
158        raise ConfigError(f"{materialization.value} materialization not supported.")
159
160    @property
161    def sql_no_config(self) -> str:
162        matches = re.findall(r"{{\s*config\(", self.sql)
163        if matches:
164            config_macro_start = self.sql.index(matches[0])
165            cursor = config_macro_start
166            quote = None
167            while cursor < len(self.sql):
168                if self.sql[cursor] in ('"', "'"):
169                    if quote is None:
170                        quote = self.sql[cursor]
171                    elif quote == self.sql[cursor]:
172                        quote = None
173                if self.sql[cursor : cursor + 2] == "}}" and quote is None:
174                    return "".join([self.sql[:config_macro_start], self.sql[cursor + 2 :]])
175                cursor += 1
176        return self.sql
177
178    @property
179    def all_sql(self) -> SqlStr:
180        return SqlStr(";\n".join(self.pre_hook + [self.sql] + self.post_hook))
181
182    def to_sqlmesh(self, context: DbtContext) -> Model:
183        """Converts the dbt model into a SQLMesh model."""
184        model_context = self._context_for_dependencies(context, self._dependencies)
185        expressions = d.parse(self.sql_no_config)
186        if not expressions:
187            raise ConfigError(f"Model '{self.table_name}' must have a query.")
188
189        optional_kwargs: t.Dict[str, t.Any] = {}
190        if self.partitioned_by:
191            optional_kwargs["partitioned_by"] = self.partitioned_by
192        for field in ("cron", "batch_size"):
193            field_val = getattr(self, field, None) or self.meta.get(field, None)
194            if field_val:
195                optional_kwargs[field] = field_val
196
197        return create_sql_model(
198            self.model_name,
199            expressions[-1],
200            dialect=self.model_dialect or model_context.dialect,
201            kind=self.model_kind(context.target),
202            start=self.start,
203            statements=expressions[0:-1],
204            **optional_kwargs,
205            **self.sqlmesh_model_kwargs(model_context),
206        )

ModelConfig contains all config parameters available to DBT models

See https://docs.getdbt.com/reference/configs-and-properties for a more detailed description of each config parameter under the General propreties, General configs, and For models sections.

Arguments:
  • sql: The model sql
  • time_column: The name of the time column
  • partitioned_by: List of columns to partition by. time_column will automatically be included, if specified.
  • cron: A cron string specifying how often the model should be refreshed, leveraging the croniter library.
  • dialect: The SQL dialect that the model's query is written in. By default, this is assumed to be the dialect of the context.
  • batch_size: The maximum number of intervals that can be run per backfill job. If this is None, then backfilling this model will do all of history in one job. If this is set, a model's backfill will be chunked such that each individual job will only contain jobs with max batch_size intervals.
  • start: The earliest date that the model will be backfilled for
  • cluster_by: Field(s) to use for clustering in data warehouses that support clustering
  • incremental_strategy: Strategy used to build the incremental model
  • materialized: How the model will be materialized in the database
  • sql_header: SQL statement to inject above create table/view as
  • unique_key: List of columns that define row uniqueness for the model
def model_kind( self, target: sqlmesh.dbt.target.TargetConfig) -> sqlmesh.core.model.kind.ModelKind:
113    def model_kind(self, target: TargetConfig) -> ModelKind:
114        """
115        Get the sqlmesh ModelKind
116        Returns:
117            The sqlmesh ModelKind
118        """
119        materialization = self.materialized
120        if materialization == Materialization.TABLE:
121            return ModelKind(name=ModelKindName.FULL)
122        if materialization == Materialization.VIEW:
123            return ModelKind(name=ModelKindName.VIEW)
124        if materialization == Materialization.INCREMENTAL:
125            if self.time_column:
126                strategy = self.incremental_strategy or target.default_incremental_strategy(
127                    IncrementalByTimeRangeKind
128                )
129                if strategy not in INCREMENTAL_BY_TIME_STRATEGIES:
130                    raise ConfigError(
131                        f"SQLMesh IncrementalByTime not compatible with '{strategy}'"
132                        f" incremental strategy. Supported strategies include {collection_to_str(INCREMENTAL_BY_TIME_STRATEGIES)}."
133                    )
134                return IncrementalByTimeRangeKind(time_column=self.time_column)
135            if self.unique_key:
136                strategy = self.incremental_strategy or target.default_incremental_strategy(
137                    IncrementalByUniqueKeyKind
138                )
139                if strategy not in INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES:
140                    support_msg = (
141                        "does not currently support"
142                        if strategy is "append"
143                        else "not compatible with"
144                    )
145                    raise ConfigError(
146                        f"{self.model_name}: SQLMesh IncrementalByUniqueKey {support_msg} '{strategy}'"
147                        f" incremental strategy. Supported strategies include {collection_to_str(INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES)}."
148                    )
149                return IncrementalByUniqueKeyKind(unique_key=self.unique_key)
150
151            raise ConfigError(
152                f"{self.model_name}: Incremental materialization requires either "
153                f"time_column ({collection_to_str(INCREMENTAL_BY_TIME_STRATEGIES)}) or "
154                f"unique_key ({collection_to_str(INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES)}) configuration."
155            )
156        if materialization == Materialization.EPHEMERAL:
157            return ModelKind(name=ModelKindName.EMBEDDED)
158        raise ConfigError(f"{materialization.value} materialization not supported.")

Get the sqlmesh ModelKind

Returns:

The sqlmesh ModelKind

def to_sqlmesh( self, context: sqlmesh.dbt.common.DbtContext) -> Annotated[Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})]:
182    def to_sqlmesh(self, context: DbtContext) -> Model:
183        """Converts the dbt model into a SQLMesh model."""
184        model_context = self._context_for_dependencies(context, self._dependencies)
185        expressions = d.parse(self.sql_no_config)
186        if not expressions:
187            raise ConfigError(f"Model '{self.table_name}' must have a query.")
188
189        optional_kwargs: t.Dict[str, t.Any] = {}
190        if self.partitioned_by:
191            optional_kwargs["partitioned_by"] = self.partitioned_by
192        for field in ("cron", "batch_size"):
193            field_val = getattr(self, field, None) or self.meta.get(field, None)
194            if field_val:
195                optional_kwargs[field] = field_val
196
197        return create_sql_model(
198            self.model_name,
199            expressions[-1],
200            dialect=self.model_dialect or model_context.dialect,
201            kind=self.model_kind(context.target),
202            start=self.start,
203            statements=expressions[0:-1],
204            **optional_kwargs,
205            **self.sqlmesh_model_kwargs(model_context),
206        )

Converts the dbt model into a SQLMesh model.