Edit on GitHub

sqlmesh.core.loader

  1from __future__ import annotations
  2
  3import abc
  4import importlib
  5import linecache
  6import os
  7import sys
  8import types
  9import typing as t
 10from dataclasses import dataclass
 11from pathlib import Path
 12
 13from sqlglot.errors import SqlglotError
 14from sqlglot.schema import MappingSchema
 15
 16from sqlmesh.core.audit import Audit
 17from sqlmesh.core.dialect import parse
 18from sqlmesh.core.hooks import HookRegistry, hook
 19from sqlmesh.core.macros import MacroRegistry, macro
 20from sqlmesh.core.model import Model, SeedModel, load_model
 21from sqlmesh.core.model import model as model_registry
 22from sqlmesh.utils import UniqueKeyDict
 23from sqlmesh.utils.dag import DAG
 24from sqlmesh.utils.errors import ConfigError
 25
 26if t.TYPE_CHECKING:
 27    from sqlmesh.core.context import Context
 28
 29
 30def update_model_schemas(dialect: str, dag: DAG[str], models: UniqueKeyDict[str, Model]) -> None:
 31    schema = MappingSchema(dialect=dialect)
 32    for name in dag.sorted():
 33        model = models.get(name)
 34
 35        # External models don't exist in the context, so we need to skip them
 36        if not model:
 37            continue
 38
 39        if model.contains_star_query and any(dep not in models for dep in model.depends_on):
 40            raise ConfigError(
 41                f"Can't expand SELECT * expression for model '{name}'. Projections for models that use external sources must be specified explicitly at '{model._path}'"
 42            )
 43
 44        model.update_schema(schema)
 45        schema.add_table(name, model.columns_to_types)
 46
 47
 48@dataclass
 49class LoadedProject:
 50    macros: MacroRegistry
 51    hooks: HookRegistry
 52    models: UniqueKeyDict[str, Model]
 53    audits: UniqueKeyDict[str, Audit]
 54    dag: DAG[str]
 55
 56
 57class Loader(abc.ABC):
 58    """Abstract base class to load macros and models for a context"""
 59
 60    def __init__(self) -> None:
 61        self._path_mtimes: t.Dict[Path, float] = {}
 62        self._dag: DAG[str] = DAG()
 63
 64    def load(self, context: Context) -> LoadedProject:
 65        """
 66        Loads all hooks, macros, and models in the context's path
 67
 68        Args:
 69            context: The context to load macros and models for
 70        """
 71        # python files are cached by the system
 72        # need to manually clear here so we can reload macros
 73        linecache.clearcache()
 74
 75        self._context = context
 76        self._path_mtimes.clear()
 77        self._dag = DAG()
 78
 79        macros, hooks = self._load_scripts()
 80        models = self._load_models(macros, hooks)
 81        for model in models.values():
 82            self._add_model_to_dag(model)
 83        update_model_schemas(self._context.dialect, self._dag, models)
 84
 85        audits = self._load_audits()
 86
 87        project = LoadedProject(
 88            macros=macros, hooks=hooks, models=models, audits=audits, dag=self._dag
 89        )
 90        return project
 91
 92    def reload_needed(self) -> bool:
 93        """
 94        Checks for any modifications to the files the macros and models depend on
 95        since the last load.
 96
 97        Returns:
 98            True if a modification is found; False otherwise
 99        """
100        return any(
101            path.stat().st_mtime > initial_mtime
102            for path, initial_mtime in self._path_mtimes.items()
103        )
104
105    @abc.abstractmethod
106    def _load_scripts(self) -> t.Tuple[MacroRegistry, HookRegistry]:
107        """Loads all user defined hooks and macros."""
108
109    @abc.abstractmethod
110    def _load_models(self, macros: MacroRegistry, hooks: HookRegistry) -> UniqueKeyDict[str, Model]:
111        """Loads all models."""
112
113    @abc.abstractmethod
114    def _load_audits(self) -> UniqueKeyDict[str, Audit]:
115        """Loads all audits."""
116
117    def _add_model_to_dag(self, model: Model) -> None:
118        self._dag.graph[model.name] = set()
119        self._dag.add(model.name, model.depends_on)
120
121    def _track_file(self, path: Path) -> None:
122        """Project file to track for modifications"""
123        self._path_mtimes[path] = path.stat().st_mtime
124
125
126class SqlMeshLoader(Loader):
127    """Loads macros and models for a context using the SQLMesh file formats"""
128
129    def _load_scripts(self) -> t.Tuple[MacroRegistry, HookRegistry]:
130        """Loads all user defined hooks and macros."""
131        # Store a copy of the macro registry
132        standard_hooks = hook.get_registry()
133        standard_macros = macro.get_registry()
134
135        for path in tuple(self._glob_path(self._context.macro_directory_path, ".py")) + tuple(
136            self._glob_path(self._context.hook_directory_path, ".py")
137        ):
138            if self._import_python_file(path.relative_to(self._context.path)):
139                self._track_file(path)
140
141        hooks = hook.get_registry()
142        macros = macro.get_registry()
143
144        hook.set_registry(standard_hooks)
145        macro.set_registry(standard_macros)
146
147        return macros, hooks
148
149    def _load_models(self, macros: MacroRegistry, hooks: HookRegistry) -> UniqueKeyDict[str, Model]:
150        """
151        Loads all of the models within the model directory with their associated
152        audits into a Dict and creates the dag
153        """
154        models = self._load_sql_models(macros, hooks)
155        models.update(self._load_python_models())
156
157        return models
158
159    def _load_sql_models(
160        self, macros: MacroRegistry, hooks: HookRegistry
161    ) -> UniqueKeyDict[str, Model]:
162        """Loads the sql models into a Dict"""
163        models: UniqueKeyDict = UniqueKeyDict("models")
164        for path in self._glob_path(self._context.models_directory_path, ".sql"):
165            self._track_file(path)
166            with open(path, "r", encoding="utf-8") as file:
167                try:
168                    expressions = parse(file.read(), default_dialect=self._context.dialect)
169                except SqlglotError as ex:
170                    raise ConfigError(f"Failed to parse a model definition at '{path}': {ex}")
171                model = load_model(
172                    expressions,
173                    defaults=self._context.config.model_defaults.dict(),
174                    macros=macros,
175                    hooks=hooks,
176                    path=Path(path).absolute(),
177                    module_path=self._context.path,
178                    dialect=self._context.dialect,
179                    time_column_format=self._context.config.time_column_format,
180                )
181                models[model.name] = model
182
183                if isinstance(model, SeedModel):
184                    seed_path = model.seed_path
185                    self._track_file(seed_path)
186
187        return models
188
189    def _load_python_models(self) -> UniqueKeyDict[str, Model]:
190        """Loads the python models into a Dict"""
191        models: UniqueKeyDict = UniqueKeyDict("models")
192        registry = model_registry.registry()
193        registry.clear()
194        registered: t.Set[str] = set()
195
196        for path in self._glob_path(self._context.models_directory_path, ".py"):
197            self._track_file(path)
198            self._import_python_file(path.relative_to(self._context.path))
199            new = registry.keys() - registered
200            registered |= new
201            for name in new:
202                model = registry[name].model(
203                    path=path,
204                    module_path=self._context.path,
205                    defaults=self._context.config.model_defaults.dict(),
206                    time_column_format=self._context.config.time_column_format,
207                )
208                models[model.name] = model
209
210        return models
211
212    def _load_audits(self) -> UniqueKeyDict[str, Audit]:
213        """Loads all the model audits."""
214        audits_by_name: UniqueKeyDict[str, Audit] = UniqueKeyDict("audits")
215        for path in self._glob_path(self._context.audits_directory_path, ".sql"):
216            self._track_file(path)
217            with open(path, "r", encoding="utf-8") as file:
218                expressions = parse(file.read(), default_dialect=self._context.dialect)
219                audits = Audit.load_multiple(
220                    expressions=expressions,
221                    path=path,
222                    dialect=self._context.dialect,
223                )
224                for audit in audits:
225                    audits_by_name[audit.name] = audit
226        return audits_by_name
227
228    def _import_python_file(self, relative_path: Path) -> types.ModuleType:
229        module_name = str(relative_path.with_suffix("")).replace(os.path.sep, ".")
230        # remove the entire module hierarchy in case they were already loaded
231        parts = module_name.split(".")
232        for i in range(len(parts)):
233            sys.modules.pop(".".join(parts[0 : i + 1]), None)
234
235        return importlib.import_module(module_name)
236
237    def _glob_path(self, path: Path, file_extension: str) -> t.Generator[Path, None, None]:
238        """
239        Globs the provided path for the file extension but also removes any filepaths that match an ignore
240        pattern either set in constants or provided in config
241
242        Args:
243            path: The filepath to glob
244            file_extension: The extension to check for in that path (checks recursively in zero or more subdirectories)
245
246        Returns:
247            Matched paths that are not ignored
248        """
249        for filepath in path.glob(f"**/*{file_extension}"):
250            for ignore_pattern in self._context.ignore_patterns:
251                if filepath.match(ignore_pattern):
252                    break
253            else:
254                yield filepath
def update_model_schemas( dialect: str, dag: sqlmesh.utils.dag.DAG[str], models: sqlmesh.utils.UniqueKeyDict[str, typing.Annotated[typing.Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})]]) -> None:
31def update_model_schemas(dialect: str, dag: DAG[str], models: UniqueKeyDict[str, Model]) -> None:
32    schema = MappingSchema(dialect=dialect)
33    for name in dag.sorted():
34        model = models.get(name)
35
36        # External models don't exist in the context, so we need to skip them
37        if not model:
38            continue
39
40        if model.contains_star_query and any(dep not in models for dep in model.depends_on):
41            raise ConfigError(
42                f"Can't expand SELECT * expression for model '{name}'. Projections for models that use external sources must be specified explicitly at '{model._path}'"
43            )
44
45        model.update_schema(schema)
46        schema.add_table(name, model.columns_to_types)
@dataclass
class LoadedProject:
49@dataclass
50class LoadedProject:
51    macros: MacroRegistry
52    hooks: HookRegistry
53    models: UniqueKeyDict[str, Model]
54    audits: UniqueKeyDict[str, Audit]
55    dag: DAG[str]
class Loader(abc.ABC):
 58class Loader(abc.ABC):
 59    """Abstract base class to load macros and models for a context"""
 60
 61    def __init__(self) -> None:
 62        self._path_mtimes: t.Dict[Path, float] = {}
 63        self._dag: DAG[str] = DAG()
 64
 65    def load(self, context: Context) -> LoadedProject:
 66        """
 67        Loads all hooks, macros, and models in the context's path
 68
 69        Args:
 70            context: The context to load macros and models for
 71        """
 72        # python files are cached by the system
 73        # need to manually clear here so we can reload macros
 74        linecache.clearcache()
 75
 76        self._context = context
 77        self._path_mtimes.clear()
 78        self._dag = DAG()
 79
 80        macros, hooks = self._load_scripts()
 81        models = self._load_models(macros, hooks)
 82        for model in models.values():
 83            self._add_model_to_dag(model)
 84        update_model_schemas(self._context.dialect, self._dag, models)
 85
 86        audits = self._load_audits()
 87
 88        project = LoadedProject(
 89            macros=macros, hooks=hooks, models=models, audits=audits, dag=self._dag
 90        )
 91        return project
 92
 93    def reload_needed(self) -> bool:
 94        """
 95        Checks for any modifications to the files the macros and models depend on
 96        since the last load.
 97
 98        Returns:
 99            True if a modification is found; False otherwise
100        """
101        return any(
102            path.stat().st_mtime > initial_mtime
103            for path, initial_mtime in self._path_mtimes.items()
104        )
105
106    @abc.abstractmethod
107    def _load_scripts(self) -> t.Tuple[MacroRegistry, HookRegistry]:
108        """Loads all user defined hooks and macros."""
109
110    @abc.abstractmethod
111    def _load_models(self, macros: MacroRegistry, hooks: HookRegistry) -> UniqueKeyDict[str, Model]:
112        """Loads all models."""
113
114    @abc.abstractmethod
115    def _load_audits(self) -> UniqueKeyDict[str, Audit]:
116        """Loads all audits."""
117
118    def _add_model_to_dag(self, model: Model) -> None:
119        self._dag.graph[model.name] = set()
120        self._dag.add(model.name, model.depends_on)
121
122    def _track_file(self, path: Path) -> None:
123        """Project file to track for modifications"""
124        self._path_mtimes[path] = path.stat().st_mtime

Abstract base class to load macros and models for a context

def load( self, context: sqlmesh.core.context.Context) -> sqlmesh.core.loader.LoadedProject:
65    def load(self, context: Context) -> LoadedProject:
66        """
67        Loads all hooks, macros, and models in the context's path
68
69        Args:
70            context: The context to load macros and models for
71        """
72        # python files are cached by the system
73        # need to manually clear here so we can reload macros
74        linecache.clearcache()
75
76        self._context = context
77        self._path_mtimes.clear()
78        self._dag = DAG()
79
80        macros, hooks = self._load_scripts()
81        models = self._load_models(macros, hooks)
82        for model in models.values():
83            self._add_model_to_dag(model)
84        update_model_schemas(self._context.dialect, self._dag, models)
85
86        audits = self._load_audits()
87
88        project = LoadedProject(
89            macros=macros, hooks=hooks, models=models, audits=audits, dag=self._dag
90        )
91        return project

Loads all hooks, macros, and models in the context's path

Arguments:
  • context: The context to load macros and models for
def reload_needed(self) -> bool:
 93    def reload_needed(self) -> bool:
 94        """
 95        Checks for any modifications to the files the macros and models depend on
 96        since the last load.
 97
 98        Returns:
 99            True if a modification is found; False otherwise
100        """
101        return any(
102            path.stat().st_mtime > initial_mtime
103            for path, initial_mtime in self._path_mtimes.items()
104        )

Checks for any modifications to the files the macros and models depend on since the last load.

Returns:

True if a modification is found; False otherwise

class SqlMeshLoader(Loader):
127class SqlMeshLoader(Loader):
128    """Loads macros and models for a context using the SQLMesh file formats"""
129
130    def _load_scripts(self) -> t.Tuple[MacroRegistry, HookRegistry]:
131        """Loads all user defined hooks and macros."""
132        # Store a copy of the macro registry
133        standard_hooks = hook.get_registry()
134        standard_macros = macro.get_registry()
135
136        for path in tuple(self._glob_path(self._context.macro_directory_path, ".py")) + tuple(
137            self._glob_path(self._context.hook_directory_path, ".py")
138        ):
139            if self._import_python_file(path.relative_to(self._context.path)):
140                self._track_file(path)
141
142        hooks = hook.get_registry()
143        macros = macro.get_registry()
144
145        hook.set_registry(standard_hooks)
146        macro.set_registry(standard_macros)
147
148        return macros, hooks
149
150    def _load_models(self, macros: MacroRegistry, hooks: HookRegistry) -> UniqueKeyDict[str, Model]:
151        """
152        Loads all of the models within the model directory with their associated
153        audits into a Dict and creates the dag
154        """
155        models = self._load_sql_models(macros, hooks)
156        models.update(self._load_python_models())
157
158        return models
159
160    def _load_sql_models(
161        self, macros: MacroRegistry, hooks: HookRegistry
162    ) -> UniqueKeyDict[str, Model]:
163        """Loads the sql models into a Dict"""
164        models: UniqueKeyDict = UniqueKeyDict("models")
165        for path in self._glob_path(self._context.models_directory_path, ".sql"):
166            self._track_file(path)
167            with open(path, "r", encoding="utf-8") as file:
168                try:
169                    expressions = parse(file.read(), default_dialect=self._context.dialect)
170                except SqlglotError as ex:
171                    raise ConfigError(f"Failed to parse a model definition at '{path}': {ex}")
172                model = load_model(
173                    expressions,
174                    defaults=self._context.config.model_defaults.dict(),
175                    macros=macros,
176                    hooks=hooks,
177                    path=Path(path).absolute(),
178                    module_path=self._context.path,
179                    dialect=self._context.dialect,
180                    time_column_format=self._context.config.time_column_format,
181                )
182                models[model.name] = model
183
184                if isinstance(model, SeedModel):
185                    seed_path = model.seed_path
186                    self._track_file(seed_path)
187
188        return models
189
190    def _load_python_models(self) -> UniqueKeyDict[str, Model]:
191        """Loads the python models into a Dict"""
192        models: UniqueKeyDict = UniqueKeyDict("models")
193        registry = model_registry.registry()
194        registry.clear()
195        registered: t.Set[str] = set()
196
197        for path in self._glob_path(self._context.models_directory_path, ".py"):
198            self._track_file(path)
199            self._import_python_file(path.relative_to(self._context.path))
200            new = registry.keys() - registered
201            registered |= new
202            for name in new:
203                model = registry[name].model(
204                    path=path,
205                    module_path=self._context.path,
206                    defaults=self._context.config.model_defaults.dict(),
207                    time_column_format=self._context.config.time_column_format,
208                )
209                models[model.name] = model
210
211        return models
212
213    def _load_audits(self) -> UniqueKeyDict[str, Audit]:
214        """Loads all the model audits."""
215        audits_by_name: UniqueKeyDict[str, Audit] = UniqueKeyDict("audits")
216        for path in self._glob_path(self._context.audits_directory_path, ".sql"):
217            self._track_file(path)
218            with open(path, "r", encoding="utf-8") as file:
219                expressions = parse(file.read(), default_dialect=self._context.dialect)
220                audits = Audit.load_multiple(
221                    expressions=expressions,
222                    path=path,
223                    dialect=self._context.dialect,
224                )
225                for audit in audits:
226                    audits_by_name[audit.name] = audit
227        return audits_by_name
228
229    def _import_python_file(self, relative_path: Path) -> types.ModuleType:
230        module_name = str(relative_path.with_suffix("")).replace(os.path.sep, ".")
231        # remove the entire module hierarchy in case they were already loaded
232        parts = module_name.split(".")
233        for i in range(len(parts)):
234            sys.modules.pop(".".join(parts[0 : i + 1]), None)
235
236        return importlib.import_module(module_name)
237
238    def _glob_path(self, path: Path, file_extension: str) -> t.Generator[Path, None, None]:
239        """
240        Globs the provided path for the file extension but also removes any filepaths that match an ignore
241        pattern either set in constants or provided in config
242
243        Args:
244            path: The filepath to glob
245            file_extension: The extension to check for in that path (checks recursively in zero or more subdirectories)
246
247        Returns:
248            Matched paths that are not ignored
249        """
250        for filepath in path.glob(f"**/*{file_extension}"):
251            for ignore_pattern in self._context.ignore_patterns:
252                if filepath.match(ignore_pattern):
253                    break
254            else:
255                yield filepath

Loads macros and models for a context using the SQLMesh file formats

Inherited Members
Loader
Loader
load
reload_needed