Edit on GitHub

sqlmesh.core.model.seed

 1from __future__ import annotations
 2
 3import typing as t
 4from io import StringIO
 5from pathlib import Path
 6
 7import numpy as np
 8import pandas as pd
 9from sqlglot import exp
10
11from sqlmesh.utils.pydantic import PydanticModel
12
13PANDAS_TYPE_MAPPINGS = {
14    np.dtype("int8"): exp.DataType.build("tinyint"),
15    np.dtype("int16"): exp.DataType.build("smallint"),
16    np.dtype("int32"): exp.DataType.build("int"),
17    np.dtype("int64"): exp.DataType.build("bigint"),
18    np.dtype("float16"): exp.DataType.build("float"),
19    np.dtype("float32"): exp.DataType.build("float"),
20    np.dtype("float64"): exp.DataType.build("double"),
21    np.dtype("O"): exp.DataType.build("varchar"),
22    np.dtype("bool"): exp.DataType.build("boolean"),
23}
24
25
26class Seed(PydanticModel):
27    """Represents content of a seed.
28
29    Presently only CSV format is supported.
30    """
31
32    content: str
33    _df: t.Optional[pd.DataFrame] = None
34
35    @property
36    def columns_to_types(self) -> t.Dict[str, exp.DataType]:
37        result = {}
38        for column_name, column_type in self._get_df().dtypes.items():
39            exp_type = PANDAS_TYPE_MAPPINGS.get(column_type)
40            if not exp_type:
41                raise ValueError(f"Unsupported pandas type '{column_type}'")
42            result[str(column_name)] = exp_type
43        return result
44
45    def read(self, batch_size: t.Optional[int] = None) -> t.Generator[pd.DataFrame, None, None]:
46        df = self._get_df()
47
48        batch_size = batch_size or df.size
49        batch_start = 0
50        while batch_start < df.shape[0]:
51            yield df.iloc[batch_start : batch_start + batch_size, :]
52            batch_start += batch_size
53
54    def _get_df(self) -> pd.DataFrame:
55        if self._df is None:
56            self._df = pd.read_csv(
57                StringIO(self.content),
58                index_col=False,
59                on_bad_lines="error",
60            )
61        return self._df
62
63
64def create_seed(path: str | Path) -> Seed:
65    with open(path, "r") as fd:
66        return Seed(content=fd.read())
class Seed(sqlmesh.utils.pydantic.PydanticModel):
27class Seed(PydanticModel):
28    """Represents content of a seed.
29
30    Presently only CSV format is supported.
31    """
32
33    content: str
34    _df: t.Optional[pd.DataFrame] = None
35
36    @property
37    def columns_to_types(self) -> t.Dict[str, exp.DataType]:
38        result = {}
39        for column_name, column_type in self._get_df().dtypes.items():
40            exp_type = PANDAS_TYPE_MAPPINGS.get(column_type)
41            if not exp_type:
42                raise ValueError(f"Unsupported pandas type '{column_type}'")
43            result[str(column_name)] = exp_type
44        return result
45
46    def read(self, batch_size: t.Optional[int] = None) -> t.Generator[pd.DataFrame, None, None]:
47        df = self._get_df()
48
49        batch_size = batch_size or df.size
50        batch_start = 0
51        while batch_start < df.shape[0]:
52            yield df.iloc[batch_start : batch_start + batch_size, :]
53            batch_start += batch_size
54
55    def _get_df(self) -> pd.DataFrame:
56        if self._df is None:
57            self._df = pd.read_csv(
58                StringIO(self.content),
59                index_col=False,
60                on_bad_lines="error",
61            )
62        return self._df

Represents content of a seed.

Presently only CSV format is supported.

def read( self, batch_size: Optional[int] = None) -> Generator[pandas.core.frame.DataFrame, NoneType, NoneType]:
46    def read(self, batch_size: t.Optional[int] = None) -> t.Generator[pd.DataFrame, None, None]:
47        df = self._get_df()
48
49        batch_size = batch_size or df.size
50        batch_start = 0
51        while batch_start < df.shape[0]:
52            yield df.iloc[batch_start : batch_start + batch_size, :]
53            batch_start += batch_size
Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
def create_seed(path: str | pathlib.Path) -> sqlmesh.core.model.seed.Seed:
65def create_seed(path: str | Path) -> Seed:
66    with open(path, "r") as fd:
67        return Seed(content=fd.read())