sqlmesh.dbt.model
1from __future__ import annotations 2 3import re 4import typing as t 5 6from pydantic import validator 7from sqlglot.helper import ensure_list 8 9from sqlmesh.core import dialect as d 10from sqlmesh.core.config.base import UpdateStrategy 11from sqlmesh.core.model import ( 12 IncrementalByTimeRangeKind, 13 IncrementalByUniqueKeyKind, 14 Model, 15 ModelKind, 16 ModelKindName, 17 create_sql_model, 18) 19from sqlmesh.dbt.basemodel import BaseModelConfig, Materialization 20from sqlmesh.dbt.common import DbtContext, SqlStr 21from sqlmesh.dbt.target import TargetConfig 22from sqlmesh.utils.errors import ConfigError 23 24INCREMENTAL_BY_TIME_STRATEGIES = set(["delete+insert", "insert_overwrite"]) 25INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES = set(["merge"]) 26 27 28def collection_to_str(collection: t.Iterable) -> str: 29 return ", ".join(f"'{item}'" for item in collection) 30 31 32class ModelConfig(BaseModelConfig): 33 """ 34 ModelConfig contains all config parameters available to DBT models 35 36 See https://docs.getdbt.com/reference/configs-and-properties for 37 a more detailed description of each config parameter under the 38 General propreties, General configs, and For models sections. 39 40 Args: 41 sql: The model sql 42 time_column: The name of the time column 43 partitioned_by: List of columns to partition by. time_column will automatically be 44 included, if specified. 45 cron: A cron string specifying how often the model should be refreshed, leveraging the 46 [croniter](https://github.com/kiorky/croniter) library. 47 dialect: The SQL dialect that the model's query is written in. By default, 48 this is assumed to be the dialect of the context. 49 batch_size: The maximum number of intervals that can be run per backfill job. If this is None, 50 then backfilling this model will do all of history in one job. If this is set, a model's backfill 51 will be chunked such that each individual job will only contain jobs with max `batch_size` intervals. 52 start: The earliest date that the model will be backfilled for 53 cluster_by: Field(s) to use for clustering in data warehouses that support clustering 54 incremental_strategy: Strategy used to build the incremental model 55 materialized: How the model will be materialized in the database 56 sql_header: SQL statement to inject above create table/view as 57 unique_key: List of columns that define row uniqueness for the model 58 """ 59 60 # sqlmesh fields 61 sql: SqlStr = SqlStr("") 62 time_column: t.Optional[str] = None 63 partitioned_by: t.Optional[t.Union[t.List[str], str]] = None 64 cron: t.Optional[str] = None 65 dialect: t.Optional[str] = None 66 batch_size: t.Optional[int] 67 68 # DBT configuration fields 69 start: t.Optional[str] = None 70 cluster_by: t.Optional[t.List[str]] = None 71 incremental_strategy: t.Optional[str] = None 72 materialized: Materialization = Materialization.VIEW 73 sql_header: t.Optional[str] = None 74 unique_key: t.Optional[t.List[str]] = None 75 76 # redshift 77 bind: t.Optional[bool] = None 78 79 @validator( 80 "unique_key", 81 "cluster_by", 82 "partitioned_by", 83 pre=True, 84 ) 85 def _validate_list(cls, v: t.Union[str, t.List[str]]) -> t.List[str]: 86 return ensure_list(v) 87 88 @validator("sql", pre=True) 89 def _validate_sql(cls, v: t.Union[str, SqlStr]) -> SqlStr: 90 return SqlStr(v) 91 92 @validator("materialized", pre=True) 93 def _validate_materialization(cls, v: str) -> Materialization: 94 return Materialization(v.lower()) 95 96 _FIELD_UPDATE_STRATEGY: t.ClassVar[t.Dict[str, UpdateStrategy]] = { 97 **BaseModelConfig._FIELD_UPDATE_STRATEGY, 98 **{ 99 "sql": UpdateStrategy.IMMUTABLE, 100 "time_column": UpdateStrategy.IMMUTABLE, 101 }, 102 } 103 104 @property 105 def model_dialect(self) -> t.Optional[str]: 106 return self.dialect or self.meta.get("dialect", None) 107 108 @property 109 def model_materialization(self) -> Materialization: 110 return self.materialized 111 112 def model_kind(self, target: TargetConfig) -> ModelKind: 113 """ 114 Get the sqlmesh ModelKind 115 Returns: 116 The sqlmesh ModelKind 117 """ 118 materialization = self.materialized 119 if materialization == Materialization.TABLE: 120 return ModelKind(name=ModelKindName.FULL) 121 if materialization == Materialization.VIEW: 122 return ModelKind(name=ModelKindName.VIEW) 123 if materialization == Materialization.INCREMENTAL: 124 if self.time_column: 125 strategy = self.incremental_strategy or target.default_incremental_strategy( 126 IncrementalByTimeRangeKind 127 ) 128 if strategy not in INCREMENTAL_BY_TIME_STRATEGIES: 129 raise ConfigError( 130 f"SQLMesh IncrementalByTime not compatible with '{strategy}'" 131 f" incremental strategy. Supported strategies include {collection_to_str(INCREMENTAL_BY_TIME_STRATEGIES)}." 132 ) 133 return IncrementalByTimeRangeKind(time_column=self.time_column) 134 if self.unique_key: 135 strategy = self.incremental_strategy or target.default_incremental_strategy( 136 IncrementalByUniqueKeyKind 137 ) 138 if strategy not in INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES: 139 support_msg = ( 140 "does not currently support" 141 if strategy is "append" 142 else "not compatible with" 143 ) 144 raise ConfigError( 145 f"{self.model_name}: SQLMesh IncrementalByUniqueKey {support_msg} '{strategy}'" 146 f" incremental strategy. Supported strategies include {collection_to_str(INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES)}." 147 ) 148 return IncrementalByUniqueKeyKind(unique_key=self.unique_key) 149 150 raise ConfigError( 151 f"{self.model_name}: Incremental materialization requires either " 152 f"time_column ({collection_to_str(INCREMENTAL_BY_TIME_STRATEGIES)}) or " 153 f"unique_key ({collection_to_str(INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES)}) configuration." 154 ) 155 if materialization == Materialization.EPHEMERAL: 156 return ModelKind(name=ModelKindName.EMBEDDED) 157 raise ConfigError(f"{materialization.value} materialization not supported.") 158 159 @property 160 def sql_no_config(self) -> str: 161 matches = re.findall(r"{{\s*config\(", self.sql) 162 if matches: 163 config_macro_start = self.sql.index(matches[0]) 164 cursor = config_macro_start 165 quote = None 166 while cursor < len(self.sql): 167 if self.sql[cursor] in ('"', "'"): 168 if quote is None: 169 quote = self.sql[cursor] 170 elif quote == self.sql[cursor]: 171 quote = None 172 if self.sql[cursor : cursor + 2] == "}}" and quote is None: 173 return "".join([self.sql[:config_macro_start], self.sql[cursor + 2 :]]) 174 cursor += 1 175 return self.sql 176 177 @property 178 def all_sql(self) -> SqlStr: 179 return SqlStr(";\n".join(self.pre_hook + [self.sql] + self.post_hook)) 180 181 def to_sqlmesh(self, context: DbtContext) -> Model: 182 """Converts the dbt model into a SQLMesh model.""" 183 model_context = self._context_for_dependencies(context, self._dependencies) 184 expressions = d.parse(self.sql_no_config) 185 if not expressions: 186 raise ConfigError(f"Model '{self.table_name}' must have a query.") 187 188 optional_kwargs: t.Dict[str, t.Any] = {} 189 if self.partitioned_by: 190 optional_kwargs["partitioned_by"] = self.partitioned_by 191 for field in ("cron", "batch_size"): 192 field_val = getattr(self, field, None) or self.meta.get(field, None) 193 if field_val: 194 optional_kwargs[field] = field_val 195 196 return create_sql_model( 197 self.model_name, 198 expressions[-1], 199 dialect=self.model_dialect or model_context.dialect, 200 kind=self.model_kind(context.target), 201 start=self.start, 202 statements=expressions[0:-1], 203 **optional_kwargs, 204 **self.sqlmesh_model_kwargs(model_context), 205 )
def
collection_to_str(collection: Iterable) -> str:
33class ModelConfig(BaseModelConfig): 34 """ 35 ModelConfig contains all config parameters available to DBT models 36 37 See https://docs.getdbt.com/reference/configs-and-properties for 38 a more detailed description of each config parameter under the 39 General propreties, General configs, and For models sections. 40 41 Args: 42 sql: The model sql 43 time_column: The name of the time column 44 partitioned_by: List of columns to partition by. time_column will automatically be 45 included, if specified. 46 cron: A cron string specifying how often the model should be refreshed, leveraging the 47 [croniter](https://github.com/kiorky/croniter) library. 48 dialect: The SQL dialect that the model's query is written in. By default, 49 this is assumed to be the dialect of the context. 50 batch_size: The maximum number of intervals that can be run per backfill job. If this is None, 51 then backfilling this model will do all of history in one job. If this is set, a model's backfill 52 will be chunked such that each individual job will only contain jobs with max `batch_size` intervals. 53 start: The earliest date that the model will be backfilled for 54 cluster_by: Field(s) to use for clustering in data warehouses that support clustering 55 incremental_strategy: Strategy used to build the incremental model 56 materialized: How the model will be materialized in the database 57 sql_header: SQL statement to inject above create table/view as 58 unique_key: List of columns that define row uniqueness for the model 59 """ 60 61 # sqlmesh fields 62 sql: SqlStr = SqlStr("") 63 time_column: t.Optional[str] = None 64 partitioned_by: t.Optional[t.Union[t.List[str], str]] = None 65 cron: t.Optional[str] = None 66 dialect: t.Optional[str] = None 67 batch_size: t.Optional[int] 68 69 # DBT configuration fields 70 start: t.Optional[str] = None 71 cluster_by: t.Optional[t.List[str]] = None 72 incremental_strategy: t.Optional[str] = None 73 materialized: Materialization = Materialization.VIEW 74 sql_header: t.Optional[str] = None 75 unique_key: t.Optional[t.List[str]] = None 76 77 # redshift 78 bind: t.Optional[bool] = None 79 80 @validator( 81 "unique_key", 82 "cluster_by", 83 "partitioned_by", 84 pre=True, 85 ) 86 def _validate_list(cls, v: t.Union[str, t.List[str]]) -> t.List[str]: 87 return ensure_list(v) 88 89 @validator("sql", pre=True) 90 def _validate_sql(cls, v: t.Union[str, SqlStr]) -> SqlStr: 91 return SqlStr(v) 92 93 @validator("materialized", pre=True) 94 def _validate_materialization(cls, v: str) -> Materialization: 95 return Materialization(v.lower()) 96 97 _FIELD_UPDATE_STRATEGY: t.ClassVar[t.Dict[str, UpdateStrategy]] = { 98 **BaseModelConfig._FIELD_UPDATE_STRATEGY, 99 **{ 100 "sql": UpdateStrategy.IMMUTABLE, 101 "time_column": UpdateStrategy.IMMUTABLE, 102 }, 103 } 104 105 @property 106 def model_dialect(self) -> t.Optional[str]: 107 return self.dialect or self.meta.get("dialect", None) 108 109 @property 110 def model_materialization(self) -> Materialization: 111 return self.materialized 112 113 def model_kind(self, target: TargetConfig) -> ModelKind: 114 """ 115 Get the sqlmesh ModelKind 116 Returns: 117 The sqlmesh ModelKind 118 """ 119 materialization = self.materialized 120 if materialization == Materialization.TABLE: 121 return ModelKind(name=ModelKindName.FULL) 122 if materialization == Materialization.VIEW: 123 return ModelKind(name=ModelKindName.VIEW) 124 if materialization == Materialization.INCREMENTAL: 125 if self.time_column: 126 strategy = self.incremental_strategy or target.default_incremental_strategy( 127 IncrementalByTimeRangeKind 128 ) 129 if strategy not in INCREMENTAL_BY_TIME_STRATEGIES: 130 raise ConfigError( 131 f"SQLMesh IncrementalByTime not compatible with '{strategy}'" 132 f" incremental strategy. Supported strategies include {collection_to_str(INCREMENTAL_BY_TIME_STRATEGIES)}." 133 ) 134 return IncrementalByTimeRangeKind(time_column=self.time_column) 135 if self.unique_key: 136 strategy = self.incremental_strategy or target.default_incremental_strategy( 137 IncrementalByUniqueKeyKind 138 ) 139 if strategy not in INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES: 140 support_msg = ( 141 "does not currently support" 142 if strategy is "append" 143 else "not compatible with" 144 ) 145 raise ConfigError( 146 f"{self.model_name}: SQLMesh IncrementalByUniqueKey {support_msg} '{strategy}'" 147 f" incremental strategy. Supported strategies include {collection_to_str(INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES)}." 148 ) 149 return IncrementalByUniqueKeyKind(unique_key=self.unique_key) 150 151 raise ConfigError( 152 f"{self.model_name}: Incremental materialization requires either " 153 f"time_column ({collection_to_str(INCREMENTAL_BY_TIME_STRATEGIES)}) or " 154 f"unique_key ({collection_to_str(INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES)}) configuration." 155 ) 156 if materialization == Materialization.EPHEMERAL: 157 return ModelKind(name=ModelKindName.EMBEDDED) 158 raise ConfigError(f"{materialization.value} materialization not supported.") 159 160 @property 161 def sql_no_config(self) -> str: 162 matches = re.findall(r"{{\s*config\(", self.sql) 163 if matches: 164 config_macro_start = self.sql.index(matches[0]) 165 cursor = config_macro_start 166 quote = None 167 while cursor < len(self.sql): 168 if self.sql[cursor] in ('"', "'"): 169 if quote is None: 170 quote = self.sql[cursor] 171 elif quote == self.sql[cursor]: 172 quote = None 173 if self.sql[cursor : cursor + 2] == "}}" and quote is None: 174 return "".join([self.sql[:config_macro_start], self.sql[cursor + 2 :]]) 175 cursor += 1 176 return self.sql 177 178 @property 179 def all_sql(self) -> SqlStr: 180 return SqlStr(";\n".join(self.pre_hook + [self.sql] + self.post_hook)) 181 182 def to_sqlmesh(self, context: DbtContext) -> Model: 183 """Converts the dbt model into a SQLMesh model.""" 184 model_context = self._context_for_dependencies(context, self._dependencies) 185 expressions = d.parse(self.sql_no_config) 186 if not expressions: 187 raise ConfigError(f"Model '{self.table_name}' must have a query.") 188 189 optional_kwargs: t.Dict[str, t.Any] = {} 190 if self.partitioned_by: 191 optional_kwargs["partitioned_by"] = self.partitioned_by 192 for field in ("cron", "batch_size"): 193 field_val = getattr(self, field, None) or self.meta.get(field, None) 194 if field_val: 195 optional_kwargs[field] = field_val 196 197 return create_sql_model( 198 self.model_name, 199 expressions[-1], 200 dialect=self.model_dialect or model_context.dialect, 201 kind=self.model_kind(context.target), 202 start=self.start, 203 statements=expressions[0:-1], 204 **optional_kwargs, 205 **self.sqlmesh_model_kwargs(model_context), 206 )
ModelConfig contains all config parameters available to DBT models
See https://docs.getdbt.com/reference/configs-and-properties for a more detailed description of each config parameter under the General propreties, General configs, and For models sections.
Arguments:
- sql: The model sql
- time_column: The name of the time column
- partitioned_by: List of columns to partition by. time_column will automatically be included, if specified.
- cron: A cron string specifying how often the model should be refreshed, leveraging the croniter library.
- dialect: The SQL dialect that the model's query is written in. By default, this is assumed to be the dialect of the context.
- batch_size: The maximum number of intervals that can be run per backfill job. If this is None,
then backfilling this model will do all of history in one job. If this is set, a model's backfill
will be chunked such that each individual job will only contain jobs with max
batch_size
intervals. - start: The earliest date that the model will be backfilled for
- cluster_by: Field(s) to use for clustering in data warehouses that support clustering
- incremental_strategy: Strategy used to build the incremental model
- materialized: How the model will be materialized in the database
- sql_header: SQL statement to inject above create table/view as
- unique_key: List of columns that define row uniqueness for the model
def
model_kind( self, target: sqlmesh.dbt.target.TargetConfig) -> sqlmesh.core.model.kind.ModelKind:
113 def model_kind(self, target: TargetConfig) -> ModelKind: 114 """ 115 Get the sqlmesh ModelKind 116 Returns: 117 The sqlmesh ModelKind 118 """ 119 materialization = self.materialized 120 if materialization == Materialization.TABLE: 121 return ModelKind(name=ModelKindName.FULL) 122 if materialization == Materialization.VIEW: 123 return ModelKind(name=ModelKindName.VIEW) 124 if materialization == Materialization.INCREMENTAL: 125 if self.time_column: 126 strategy = self.incremental_strategy or target.default_incremental_strategy( 127 IncrementalByTimeRangeKind 128 ) 129 if strategy not in INCREMENTAL_BY_TIME_STRATEGIES: 130 raise ConfigError( 131 f"SQLMesh IncrementalByTime not compatible with '{strategy}'" 132 f" incremental strategy. Supported strategies include {collection_to_str(INCREMENTAL_BY_TIME_STRATEGIES)}." 133 ) 134 return IncrementalByTimeRangeKind(time_column=self.time_column) 135 if self.unique_key: 136 strategy = self.incremental_strategy or target.default_incremental_strategy( 137 IncrementalByUniqueKeyKind 138 ) 139 if strategy not in INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES: 140 support_msg = ( 141 "does not currently support" 142 if strategy is "append" 143 else "not compatible with" 144 ) 145 raise ConfigError( 146 f"{self.model_name}: SQLMesh IncrementalByUniqueKey {support_msg} '{strategy}'" 147 f" incremental strategy. Supported strategies include {collection_to_str(INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES)}." 148 ) 149 return IncrementalByUniqueKeyKind(unique_key=self.unique_key) 150 151 raise ConfigError( 152 f"{self.model_name}: Incremental materialization requires either " 153 f"time_column ({collection_to_str(INCREMENTAL_BY_TIME_STRATEGIES)}) or " 154 f"unique_key ({collection_to_str(INCREMENTAL_BY_UNIQUE_KEY_STRATEGIES)}) configuration." 155 ) 156 if materialization == Materialization.EPHEMERAL: 157 return ModelKind(name=ModelKindName.EMBEDDED) 158 raise ConfigError(f"{materialization.value} materialization not supported.")
Get the sqlmesh ModelKind
Returns:
The sqlmesh ModelKind
def
to_sqlmesh( self, context: sqlmesh.dbt.common.DbtContext) -> Annotated[Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})]:
182 def to_sqlmesh(self, context: DbtContext) -> Model: 183 """Converts the dbt model into a SQLMesh model.""" 184 model_context = self._context_for_dependencies(context, self._dependencies) 185 expressions = d.parse(self.sql_no_config) 186 if not expressions: 187 raise ConfigError(f"Model '{self.table_name}' must have a query.") 188 189 optional_kwargs: t.Dict[str, t.Any] = {} 190 if self.partitioned_by: 191 optional_kwargs["partitioned_by"] = self.partitioned_by 192 for field in ("cron", "batch_size"): 193 field_val = getattr(self, field, None) or self.meta.get(field, None) 194 if field_val: 195 optional_kwargs[field] = field_val 196 197 return create_sql_model( 198 self.model_name, 199 expressions[-1], 200 dialect=self.model_dialect or model_context.dialect, 201 kind=self.model_kind(context.target), 202 start=self.start, 203 statements=expressions[0:-1], 204 **optional_kwargs, 205 **self.sqlmesh_model_kwargs(model_context), 206 )
Converts the dbt model into a SQLMesh model.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs