sqlmesh.core.model.kind
1from __future__ import annotations 2 3import typing as t 4from enum import Enum 5 6from pydantic import Field, validator 7from sqlglot import exp 8from sqlglot.time import format_time 9 10from sqlmesh.core import dialect as d 11from sqlmesh.utils.errors import ConfigError 12from sqlmesh.utils.pydantic import PydanticModel 13 14 15# TODO: switch to autoname when sqlglot is typed 16class ModelKindName(str, Enum): 17 """The kind of model, determining how this data is computed and stored in the warehouse.""" 18 19 INCREMENTAL_BY_TIME_RANGE = "INCREMENTAL_BY_TIME_RANGE" 20 INCREMENTAL_BY_UNIQUE_KEY = "INCREMENTAL_BY_UNIQUE_KEY" 21 FULL = "FULL" 22 VIEW = "VIEW" 23 EMBEDDED = "EMBEDDED" 24 SEED = "SEED" 25 # TODO: Add support for snapshots 26 # SNAPSHOT = "SNAPSHOT" 27 28 29class ModelKind(PydanticModel): 30 name: ModelKindName 31 32 @property 33 def is_incremental_by_time_range(self) -> bool: 34 return self.name == ModelKindName.INCREMENTAL_BY_TIME_RANGE 35 36 @property 37 def is_incremental_by_unique_key(self) -> bool: 38 return self.name == ModelKindName.INCREMENTAL_BY_UNIQUE_KEY 39 40 @property 41 def is_full(self) -> bool: 42 return self.name == ModelKindName.FULL 43 44 # @property 45 # def is_snapshot(self) -> bool: 46 # return self.name == ModelKindName.SNAPSHOT 47 48 @property 49 def is_view(self) -> bool: 50 return self.name == ModelKindName.VIEW 51 52 @property 53 def is_embedded(self) -> bool: 54 return self.name == ModelKindName.EMBEDDED 55 56 @property 57 def is_seed(self) -> bool: 58 return self.name == ModelKindName.SEED 59 60 @property 61 def is_materialized(self) -> bool: 62 return self.name not in (ModelKindName.VIEW, ModelKindName.EMBEDDED) 63 64 @property 65 def only_latest(self) -> bool: 66 """Whether or not this model only cares about latest date to render.""" 67 return self.name in (ModelKindName.VIEW, ModelKindName.FULL) 68 69 def to_expression(self, **kwargs: t.Any) -> d.ModelKind: 70 return d.ModelKind(this=self.name.value.upper(), **kwargs) 71 72 73class TimeColumn(PydanticModel): 74 column: str 75 format: t.Optional[str] = None 76 77 @property 78 def expression(self) -> exp.Column | exp.Tuple: 79 """Convert this pydantic model into a time_column SQLGlot expression.""" 80 column = exp.to_column(self.column) 81 if not self.format: 82 return column 83 84 return exp.Tuple(expressions=[column, exp.Literal.string(self.format)]) 85 86 def to_expression(self, dialect: str) -> exp.Column | exp.Tuple: 87 """Convert this pydantic model into a time_column SQLGlot expression.""" 88 column = exp.to_column(self.column) 89 if not self.format: 90 return column 91 92 return exp.Tuple( 93 expressions=[ 94 column, 95 exp.Literal.string( 96 format_time( 97 self.format, 98 d.Dialect.get_or_raise(dialect).inverse_time_mapping, # type: ignore 99 ) 100 ), 101 ] 102 ) 103 104 105class IncrementalByTimeRangeKind(ModelKind): 106 name: ModelKindName = Field(ModelKindName.INCREMENTAL_BY_TIME_RANGE, const=True) 107 time_column: TimeColumn 108 109 @validator("time_column", pre=True) 110 def _parse_time_column(cls, v: t.Any) -> TimeColumn: 111 if isinstance(v, exp.Tuple): 112 kwargs = { 113 key: v.expressions[i].name 114 for i, key in enumerate(("column", "format")[: len(v.expressions)]) 115 } 116 return TimeColumn(**kwargs) 117 118 if isinstance(v, exp.Identifier): 119 return TimeColumn(column=v.name) 120 121 if isinstance(v, exp.Expression): 122 return TimeColumn(column=v.name) 123 124 if isinstance(v, str): 125 return TimeColumn(column=v) 126 return v 127 128 def to_expression(self, dialect: str = "", **kwargs: t.Any) -> d.ModelKind: 129 return super().to_expression( 130 expressions=[ 131 exp.Property(this="time_column", value=self.time_column.to_expression(dialect)) 132 ], 133 ) 134 135 136class IncrementalByUniqueKeyKind(ModelKind): 137 name: ModelKindName = Field(ModelKindName.INCREMENTAL_BY_UNIQUE_KEY, const=True) 138 unique_key: t.List[str] 139 140 @validator("unique_key", pre=True) 141 def _parse_unique_key(cls, v: t.Any) -> t.List[str]: 142 if isinstance(v, exp.Identifier): 143 return [v.this] 144 if isinstance(v, exp.Tuple): 145 return [e.this for e in v.expressions] 146 return [i.this if isinstance(i, exp.Identifier) else str(i) for i in v] 147 148 149class SeedKind(ModelKind): 150 name: ModelKindName = Field(ModelKindName.SEED, const=True) 151 path: str 152 batch_size: int = 1000 153 154 @validator("batch_size", pre=True) 155 def _parse_batch_size(cls, v: t.Any) -> int: 156 if isinstance(v, exp.Expression) and v.is_int: 157 v = int(v.name) 158 if not isinstance(v, int): 159 raise ValueError("Seed batch size must be an integer value") 160 if v <= 0: 161 raise ValueError("Seed batch size must be a positive integer") 162 return v 163 164 @validator("path", pre=True) 165 def _parse_path(cls, v: t.Any) -> str: 166 if isinstance(v, exp.Literal): 167 return v.this 168 return str(v) 169 170 def to_expression(self, **kwargs: t.Any) -> d.ModelKind: 171 """Convert the seed kind into a SQLGlot expression.""" 172 return super().to_expression( 173 expressions=[ 174 exp.Property(this=exp.Var(this="path"), value=exp.Literal.string(self.path)), 175 exp.Property( 176 this=exp.Var(this="batch_size"), 177 value=exp.Literal.number(self.batch_size), 178 ), 179 ], 180 ) 181 182 183def _model_kind_validator(v: t.Any) -> ModelKind: 184 if isinstance(v, ModelKind): 185 return v 186 187 if isinstance(v, d.ModelKind): 188 name = v.this 189 props = {prop.name: prop.args.get("value") for prop in v.expressions} 190 klass: t.Type[ModelKind] = ModelKind 191 if name == ModelKindName.INCREMENTAL_BY_TIME_RANGE: 192 klass = IncrementalByTimeRangeKind 193 elif name == ModelKindName.INCREMENTAL_BY_UNIQUE_KEY: 194 klass = IncrementalByUniqueKeyKind 195 elif name == ModelKindName.SEED: 196 klass = SeedKind 197 else: 198 props["name"] = ModelKindName(name) 199 return klass(**props) 200 201 if isinstance(v, dict): 202 if v.get("name") == ModelKindName.INCREMENTAL_BY_TIME_RANGE: 203 klass = IncrementalByTimeRangeKind 204 elif v.get("name") == ModelKindName.INCREMENTAL_BY_UNIQUE_KEY: 205 klass = IncrementalByUniqueKeyKind 206 elif v.get("name") == ModelKindName.SEED: 207 klass = SeedKind 208 else: 209 klass = ModelKind 210 return klass(**v) 211 212 name = (v.name if isinstance(v, exp.Expression) else str(v)).upper() 213 214 try: 215 return ModelKind(name=ModelKindName(name)) 216 except ValueError: 217 raise ConfigError(f"Invalid model kind '{name}'") 218 219 220model_kind_validator = validator("kind", pre=True, allow_reuse=True)(_model_kind_validator)
class
ModelKindName(builtins.str, enum.Enum):
17class ModelKindName(str, Enum): 18 """The kind of model, determining how this data is computed and stored in the warehouse.""" 19 20 INCREMENTAL_BY_TIME_RANGE = "INCREMENTAL_BY_TIME_RANGE" 21 INCREMENTAL_BY_UNIQUE_KEY = "INCREMENTAL_BY_UNIQUE_KEY" 22 FULL = "FULL" 23 VIEW = "VIEW" 24 EMBEDDED = "EMBEDDED" 25 SEED = "SEED" 26 # TODO: Add support for snapshots 27 # SNAPSHOT = "SNAPSHOT"
The kind of model, determining how this data is computed and stored in the warehouse.
INCREMENTAL_BY_TIME_RANGE = <ModelKindName.INCREMENTAL_BY_TIME_RANGE: 'INCREMENTAL_BY_TIME_RANGE'>
INCREMENTAL_BY_UNIQUE_KEY = <ModelKindName.INCREMENTAL_BY_UNIQUE_KEY: 'INCREMENTAL_BY_UNIQUE_KEY'>
FULL = <ModelKindName.FULL: 'FULL'>
VIEW = <ModelKindName.VIEW: 'VIEW'>
EMBEDDED = <ModelKindName.EMBEDDED: 'EMBEDDED'>
SEED = <ModelKindName.SEED: 'SEED'>
Inherited Members
- enum.Enum
- name
- value
- builtins.str
- encode
- replace
- split
- rsplit
- join
- capitalize
- casefold
- title
- center
- count
- expandtabs
- find
- partition
- index
- ljust
- lower
- lstrip
- rfind
- rindex
- rjust
- rstrip
- rpartition
- splitlines
- strip
- swapcase
- translate
- upper
- startswith
- endswith
- removeprefix
- removesuffix
- isascii
- islower
- isupper
- istitle
- isspace
- isdecimal
- isdigit
- isnumeric
- isalpha
- isalnum
- isidentifier
- isprintable
- zfill
- format
- format_map
- maketrans
30class ModelKind(PydanticModel): 31 name: ModelKindName 32 33 @property 34 def is_incremental_by_time_range(self) -> bool: 35 return self.name == ModelKindName.INCREMENTAL_BY_TIME_RANGE 36 37 @property 38 def is_incremental_by_unique_key(self) -> bool: 39 return self.name == ModelKindName.INCREMENTAL_BY_UNIQUE_KEY 40 41 @property 42 def is_full(self) -> bool: 43 return self.name == ModelKindName.FULL 44 45 # @property 46 # def is_snapshot(self) -> bool: 47 # return self.name == ModelKindName.SNAPSHOT 48 49 @property 50 def is_view(self) -> bool: 51 return self.name == ModelKindName.VIEW 52 53 @property 54 def is_embedded(self) -> bool: 55 return self.name == ModelKindName.EMBEDDED 56 57 @property 58 def is_seed(self) -> bool: 59 return self.name == ModelKindName.SEED 60 61 @property 62 def is_materialized(self) -> bool: 63 return self.name not in (ModelKindName.VIEW, ModelKindName.EMBEDDED) 64 65 @property 66 def only_latest(self) -> bool: 67 """Whether or not this model only cares about latest date to render.""" 68 return self.name in (ModelKindName.VIEW, ModelKindName.FULL) 69 70 def to_expression(self, **kwargs: t.Any) -> d.ModelKind: 71 return d.ModelKind(this=self.name.value.upper(), **kwargs)
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
74class TimeColumn(PydanticModel): 75 column: str 76 format: t.Optional[str] = None 77 78 @property 79 def expression(self) -> exp.Column | exp.Tuple: 80 """Convert this pydantic model into a time_column SQLGlot expression.""" 81 column = exp.to_column(self.column) 82 if not self.format: 83 return column 84 85 return exp.Tuple(expressions=[column, exp.Literal.string(self.format)]) 86 87 def to_expression(self, dialect: str) -> exp.Column | exp.Tuple: 88 """Convert this pydantic model into a time_column SQLGlot expression.""" 89 column = exp.to_column(self.column) 90 if not self.format: 91 return column 92 93 return exp.Tuple( 94 expressions=[ 95 column, 96 exp.Literal.string( 97 format_time( 98 self.format, 99 d.Dialect.get_or_raise(dialect).inverse_time_mapping, # type: ignore 100 ) 101 ), 102 ] 103 )
expression: sqlglot.expressions.Column | sqlglot.expressions.Tuple
Convert this pydantic model into a time_column SQLGlot expression.
def
to_expression( self, dialect: str) -> sqlglot.expressions.Column | sqlglot.expressions.Tuple:
87 def to_expression(self, dialect: str) -> exp.Column | exp.Tuple: 88 """Convert this pydantic model into a time_column SQLGlot expression.""" 89 column = exp.to_column(self.column) 90 if not self.format: 91 return column 92 93 return exp.Tuple( 94 expressions=[ 95 column, 96 exp.Literal.string( 97 format_time( 98 self.format, 99 d.Dialect.get_or_raise(dialect).inverse_time_mapping, # type: ignore 100 ) 101 ), 102 ] 103 )
Convert this pydantic model into a time_column SQLGlot expression.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
106class IncrementalByTimeRangeKind(ModelKind): 107 name: ModelKindName = Field(ModelKindName.INCREMENTAL_BY_TIME_RANGE, const=True) 108 time_column: TimeColumn 109 110 @validator("time_column", pre=True) 111 def _parse_time_column(cls, v: t.Any) -> TimeColumn: 112 if isinstance(v, exp.Tuple): 113 kwargs = { 114 key: v.expressions[i].name 115 for i, key in enumerate(("column", "format")[: len(v.expressions)]) 116 } 117 return TimeColumn(**kwargs) 118 119 if isinstance(v, exp.Identifier): 120 return TimeColumn(column=v.name) 121 122 if isinstance(v, exp.Expression): 123 return TimeColumn(column=v.name) 124 125 if isinstance(v, str): 126 return TimeColumn(column=v) 127 return v 128 129 def to_expression(self, dialect: str = "", **kwargs: t.Any) -> d.ModelKind: 130 return super().to_expression( 131 expressions=[ 132 exp.Property(this="time_column", value=self.time_column.to_expression(dialect)) 133 ], 134 )
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
137class IncrementalByUniqueKeyKind(ModelKind): 138 name: ModelKindName = Field(ModelKindName.INCREMENTAL_BY_UNIQUE_KEY, const=True) 139 unique_key: t.List[str] 140 141 @validator("unique_key", pre=True) 142 def _parse_unique_key(cls, v: t.Any) -> t.List[str]: 143 if isinstance(v, exp.Identifier): 144 return [v.this] 145 if isinstance(v, exp.Tuple): 146 return [e.this for e in v.expressions] 147 return [i.this if isinstance(i, exp.Identifier) else str(i) for i in v]
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
150class SeedKind(ModelKind): 151 name: ModelKindName = Field(ModelKindName.SEED, const=True) 152 path: str 153 batch_size: int = 1000 154 155 @validator("batch_size", pre=True) 156 def _parse_batch_size(cls, v: t.Any) -> int: 157 if isinstance(v, exp.Expression) and v.is_int: 158 v = int(v.name) 159 if not isinstance(v, int): 160 raise ValueError("Seed batch size must be an integer value") 161 if v <= 0: 162 raise ValueError("Seed batch size must be a positive integer") 163 return v 164 165 @validator("path", pre=True) 166 def _parse_path(cls, v: t.Any) -> str: 167 if isinstance(v, exp.Literal): 168 return v.this 169 return str(v) 170 171 def to_expression(self, **kwargs: t.Any) -> d.ModelKind: 172 """Convert the seed kind into a SQLGlot expression.""" 173 return super().to_expression( 174 expressions=[ 175 exp.Property(this=exp.Var(this="path"), value=exp.Literal.string(self.path)), 176 exp.Property( 177 this=exp.Var(this="batch_size"), 178 value=exp.Literal.number(self.batch_size), 179 ), 180 ], 181 )
171 def to_expression(self, **kwargs: t.Any) -> d.ModelKind: 172 """Convert the seed kind into a SQLGlot expression.""" 173 return super().to_expression( 174 expressions=[ 175 exp.Property(this=exp.Var(this="path"), value=exp.Literal.string(self.path)), 176 exp.Property( 177 this=exp.Var(this="batch_size"), 178 value=exp.Literal.number(self.batch_size), 179 ), 180 ], 181 )
Convert the seed kind into a SQLGlot expression.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
184def _model_kind_validator(v: t.Any) -> ModelKind: 185 if isinstance(v, ModelKind): 186 return v 187 188 if isinstance(v, d.ModelKind): 189 name = v.this 190 props = {prop.name: prop.args.get("value") for prop in v.expressions} 191 klass: t.Type[ModelKind] = ModelKind 192 if name == ModelKindName.INCREMENTAL_BY_TIME_RANGE: 193 klass = IncrementalByTimeRangeKind 194 elif name == ModelKindName.INCREMENTAL_BY_UNIQUE_KEY: 195 klass = IncrementalByUniqueKeyKind 196 elif name == ModelKindName.SEED: 197 klass = SeedKind 198 else: 199 props["name"] = ModelKindName(name) 200 return klass(**props) 201 202 if isinstance(v, dict): 203 if v.get("name") == ModelKindName.INCREMENTAL_BY_TIME_RANGE: 204 klass = IncrementalByTimeRangeKind 205 elif v.get("name") == ModelKindName.INCREMENTAL_BY_UNIQUE_KEY: 206 klass = IncrementalByUniqueKeyKind 207 elif v.get("name") == ModelKindName.SEED: 208 klass = SeedKind 209 else: 210 klass = ModelKind 211 return klass(**v) 212 213 name = (v.name if isinstance(v, exp.Expression) else str(v)).upper() 214 215 try: 216 return ModelKind(name=ModelKindName(name)) 217 except ValueError: 218 raise ConfigError(f"Invalid model kind '{name}'")