sqlmesh.core.model.seed
1from __future__ import annotations 2 3import typing as t 4from io import StringIO 5from pathlib import Path 6 7import numpy as np 8import pandas as pd 9from sqlglot import exp 10 11from sqlmesh.utils.pydantic import PydanticModel 12 13PANDAS_TYPE_MAPPINGS = { 14 np.dtype("int8"): exp.DataType.build("tinyint"), 15 np.dtype("int16"): exp.DataType.build("smallint"), 16 np.dtype("int32"): exp.DataType.build("int"), 17 np.dtype("int64"): exp.DataType.build("bigint"), 18 np.dtype("float16"): exp.DataType.build("float"), 19 np.dtype("float32"): exp.DataType.build("float"), 20 np.dtype("float64"): exp.DataType.build("double"), 21 np.dtype("O"): exp.DataType.build("varchar"), 22 np.dtype("bool"): exp.DataType.build("boolean"), 23} 24 25 26class Seed(PydanticModel): 27 """Represents content of a seed. 28 29 Presently only CSV format is supported. 30 """ 31 32 content: str 33 _df: t.Optional[pd.DataFrame] = None 34 35 @property 36 def columns_to_types(self) -> t.Dict[str, exp.DataType]: 37 result = {} 38 for column_name, column_type in self._get_df().dtypes.items(): 39 exp_type = PANDAS_TYPE_MAPPINGS.get(column_type) 40 if not exp_type: 41 raise ValueError(f"Unsupported pandas type '{column_type}'") 42 result[str(column_name)] = exp_type 43 return result 44 45 def read(self, batch_size: t.Optional[int] = None) -> t.Generator[pd.DataFrame, None, None]: 46 df = self._get_df() 47 48 batch_size = batch_size or df.size 49 batch_start = 0 50 while batch_start < df.shape[0]: 51 yield df.iloc[batch_start : batch_start + batch_size, :] 52 batch_start += batch_size 53 54 def _get_df(self) -> pd.DataFrame: 55 if self._df is None: 56 self._df = pd.read_csv( 57 StringIO(self.content), 58 index_col=False, 59 on_bad_lines="error", 60 ) 61 return self._df 62 63 64def create_seed(path: str | Path) -> Seed: 65 with open(path, "r") as fd: 66 return Seed(content=fd.read())
27class Seed(PydanticModel): 28 """Represents content of a seed. 29 30 Presently only CSV format is supported. 31 """ 32 33 content: str 34 _df: t.Optional[pd.DataFrame] = None 35 36 @property 37 def columns_to_types(self) -> t.Dict[str, exp.DataType]: 38 result = {} 39 for column_name, column_type in self._get_df().dtypes.items(): 40 exp_type = PANDAS_TYPE_MAPPINGS.get(column_type) 41 if not exp_type: 42 raise ValueError(f"Unsupported pandas type '{column_type}'") 43 result[str(column_name)] = exp_type 44 return result 45 46 def read(self, batch_size: t.Optional[int] = None) -> t.Generator[pd.DataFrame, None, None]: 47 df = self._get_df() 48 49 batch_size = batch_size or df.size 50 batch_start = 0 51 while batch_start < df.shape[0]: 52 yield df.iloc[batch_start : batch_start + batch_size, :] 53 batch_start += batch_size 54 55 def _get_df(self) -> pd.DataFrame: 56 if self._df is None: 57 self._df = pd.read_csv( 58 StringIO(self.content), 59 index_col=False, 60 on_bad_lines="error", 61 ) 62 return self._df
Represents content of a seed.
Presently only CSV format is supported.
def
read( self, batch_size: Optional[int] = None) -> Generator[pandas.core.frame.DataFrame, NoneType, NoneType]:
46 def read(self, batch_size: t.Optional[int] = None) -> t.Generator[pd.DataFrame, None, None]: 47 df = self._get_df() 48 49 batch_size = batch_size or df.size 50 batch_start = 0 51 while batch_start < df.shape[0]: 52 yield df.iloc[batch_start : batch_start + batch_size, :] 53 batch_start += batch_size
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs