sqlmesh.core.loader
1from __future__ import annotations 2 3import abc 4import importlib 5import linecache 6import os 7import sys 8import types 9import typing as t 10from dataclasses import dataclass 11from pathlib import Path 12 13from sqlglot.errors import SqlglotError 14from sqlglot.schema import MappingSchema 15 16from sqlmesh.core.audit import Audit 17from sqlmesh.core.dialect import parse 18from sqlmesh.core.hooks import HookRegistry, hook 19from sqlmesh.core.macros import MacroRegistry, macro 20from sqlmesh.core.model import Model, SeedModel, load_model 21from sqlmesh.core.model import model as model_registry 22from sqlmesh.utils import UniqueKeyDict 23from sqlmesh.utils.dag import DAG 24from sqlmesh.utils.errors import ConfigError 25 26if t.TYPE_CHECKING: 27 from sqlmesh.core.context import Context 28 29 30def update_model_schemas(dialect: str, dag: DAG[str], models: UniqueKeyDict[str, Model]) -> None: 31 schema = MappingSchema(dialect=dialect) 32 for name in dag.sorted(): 33 model = models.get(name) 34 35 # External models don't exist in the context, so we need to skip them 36 if not model: 37 continue 38 39 if model.contains_star_query and any(dep not in models for dep in model.depends_on): 40 raise ConfigError( 41 f"Can't expand SELECT * expression for model '{name}'. Projections for models that use external sources must be specified explicitly at '{model._path}'" 42 ) 43 44 model.update_schema(schema) 45 schema.add_table(name, model.columns_to_types) 46 47 48@dataclass 49class LoadedProject: 50 macros: MacroRegistry 51 hooks: HookRegistry 52 models: UniqueKeyDict[str, Model] 53 audits: UniqueKeyDict[str, Audit] 54 dag: DAG[str] 55 56 57class Loader(abc.ABC): 58 """Abstract base class to load macros and models for a context""" 59 60 def __init__(self) -> None: 61 self._path_mtimes: t.Dict[Path, float] = {} 62 self._dag: DAG[str] = DAG() 63 64 def load(self, context: Context) -> LoadedProject: 65 """ 66 Loads all hooks, macros, and models in the context's path 67 68 Args: 69 context: The context to load macros and models for 70 """ 71 # python files are cached by the system 72 # need to manually clear here so we can reload macros 73 linecache.clearcache() 74 75 self._context = context 76 self._path_mtimes.clear() 77 self._dag = DAG() 78 79 macros, hooks = self._load_scripts() 80 models = self._load_models(macros, hooks) 81 for model in models.values(): 82 self._add_model_to_dag(model) 83 update_model_schemas(self._context.dialect, self._dag, models) 84 85 audits = self._load_audits() 86 87 project = LoadedProject( 88 macros=macros, hooks=hooks, models=models, audits=audits, dag=self._dag 89 ) 90 return project 91 92 def reload_needed(self) -> bool: 93 """ 94 Checks for any modifications to the files the macros and models depend on 95 since the last load. 96 97 Returns: 98 True if a modification is found; False otherwise 99 """ 100 return any( 101 path.stat().st_mtime > initial_mtime 102 for path, initial_mtime in self._path_mtimes.items() 103 ) 104 105 @abc.abstractmethod 106 def _load_scripts(self) -> t.Tuple[MacroRegistry, HookRegistry]: 107 """Loads all user defined hooks and macros.""" 108 109 @abc.abstractmethod 110 def _load_models(self, macros: MacroRegistry, hooks: HookRegistry) -> UniqueKeyDict[str, Model]: 111 """Loads all models.""" 112 113 @abc.abstractmethod 114 def _load_audits(self) -> UniqueKeyDict[str, Audit]: 115 """Loads all audits.""" 116 117 def _add_model_to_dag(self, model: Model) -> None: 118 self._dag.graph[model.name] = set() 119 self._dag.add(model.name, model.depends_on) 120 121 def _track_file(self, path: Path) -> None: 122 """Project file to track for modifications""" 123 self._path_mtimes[path] = path.stat().st_mtime 124 125 126class SqlMeshLoader(Loader): 127 """Loads macros and models for a context using the SQLMesh file formats""" 128 129 def _load_scripts(self) -> t.Tuple[MacroRegistry, HookRegistry]: 130 """Loads all user defined hooks and macros.""" 131 # Store a copy of the macro registry 132 standard_hooks = hook.get_registry() 133 standard_macros = macro.get_registry() 134 135 for path in tuple(self._glob_path(self._context.macro_directory_path, ".py")) + tuple( 136 self._glob_path(self._context.hook_directory_path, ".py") 137 ): 138 if self._import_python_file(path.relative_to(self._context.path)): 139 self._track_file(path) 140 141 hooks = hook.get_registry() 142 macros = macro.get_registry() 143 144 hook.set_registry(standard_hooks) 145 macro.set_registry(standard_macros) 146 147 return macros, hooks 148 149 def _load_models(self, macros: MacroRegistry, hooks: HookRegistry) -> UniqueKeyDict[str, Model]: 150 """ 151 Loads all of the models within the model directory with their associated 152 audits into a Dict and creates the dag 153 """ 154 models = self._load_sql_models(macros, hooks) 155 models.update(self._load_python_models()) 156 157 return models 158 159 def _load_sql_models( 160 self, macros: MacroRegistry, hooks: HookRegistry 161 ) -> UniqueKeyDict[str, Model]: 162 """Loads the sql models into a Dict""" 163 models: UniqueKeyDict = UniqueKeyDict("models") 164 for path in self._glob_path(self._context.models_directory_path, ".sql"): 165 self._track_file(path) 166 with open(path, "r", encoding="utf-8") as file: 167 try: 168 expressions = parse(file.read(), default_dialect=self._context.dialect) 169 except SqlglotError as ex: 170 raise ConfigError(f"Failed to parse a model definition at '{path}': {ex}") 171 model = load_model( 172 expressions, 173 defaults=self._context.config.model_defaults.dict(), 174 macros=macros, 175 hooks=hooks, 176 path=Path(path).absolute(), 177 module_path=self._context.path, 178 dialect=self._context.dialect, 179 time_column_format=self._context.config.time_column_format, 180 ) 181 models[model.name] = model 182 183 if isinstance(model, SeedModel): 184 seed_path = model.seed_path 185 self._track_file(seed_path) 186 187 return models 188 189 def _load_python_models(self) -> UniqueKeyDict[str, Model]: 190 """Loads the python models into a Dict""" 191 models: UniqueKeyDict = UniqueKeyDict("models") 192 registry = model_registry.registry() 193 registry.clear() 194 registered: t.Set[str] = set() 195 196 for path in self._glob_path(self._context.models_directory_path, ".py"): 197 self._track_file(path) 198 self._import_python_file(path.relative_to(self._context.path)) 199 new = registry.keys() - registered 200 registered |= new 201 for name in new: 202 model = registry[name].model( 203 path=path, 204 module_path=self._context.path, 205 defaults=self._context.config.model_defaults.dict(), 206 time_column_format=self._context.config.time_column_format, 207 ) 208 models[model.name] = model 209 210 return models 211 212 def _load_audits(self) -> UniqueKeyDict[str, Audit]: 213 """Loads all the model audits.""" 214 audits_by_name: UniqueKeyDict[str, Audit] = UniqueKeyDict("audits") 215 for path in self._glob_path(self._context.audits_directory_path, ".sql"): 216 self._track_file(path) 217 with open(path, "r", encoding="utf-8") as file: 218 expressions = parse(file.read(), default_dialect=self._context.dialect) 219 audits = Audit.load_multiple( 220 expressions=expressions, 221 path=path, 222 dialect=self._context.dialect, 223 ) 224 for audit in audits: 225 audits_by_name[audit.name] = audit 226 return audits_by_name 227 228 def _import_python_file(self, relative_path: Path) -> types.ModuleType: 229 module_name = str(relative_path.with_suffix("")).replace(os.path.sep, ".") 230 # remove the entire module hierarchy in case they were already loaded 231 parts = module_name.split(".") 232 for i in range(len(parts)): 233 sys.modules.pop(".".join(parts[0 : i + 1]), None) 234 235 return importlib.import_module(module_name) 236 237 def _glob_path(self, path: Path, file_extension: str) -> t.Generator[Path, None, None]: 238 """ 239 Globs the provided path for the file extension but also removes any filepaths that match an ignore 240 pattern either set in constants or provided in config 241 242 Args: 243 path: The filepath to glob 244 file_extension: The extension to check for in that path (checks recursively in zero or more subdirectories) 245 246 Returns: 247 Matched paths that are not ignored 248 """ 249 for filepath in path.glob(f"**/*{file_extension}"): 250 for ignore_pattern in self._context.ignore_patterns: 251 if filepath.match(ignore_pattern): 252 break 253 else: 254 yield filepath
def
update_model_schemas( dialect: str, dag: sqlmesh.utils.dag.DAG[str], models: sqlmesh.utils.UniqueKeyDict[str, typing.Annotated[typing.Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})]]) -> None:
31def update_model_schemas(dialect: str, dag: DAG[str], models: UniqueKeyDict[str, Model]) -> None: 32 schema = MappingSchema(dialect=dialect) 33 for name in dag.sorted(): 34 model = models.get(name) 35 36 # External models don't exist in the context, so we need to skip them 37 if not model: 38 continue 39 40 if model.contains_star_query and any(dep not in models for dep in model.depends_on): 41 raise ConfigError( 42 f"Can't expand SELECT * expression for model '{name}'. Projections for models that use external sources must be specified explicitly at '{model._path}'" 43 ) 44 45 model.update_schema(schema) 46 schema.add_table(name, model.columns_to_types)
@dataclass
class
LoadedProject:
49@dataclass 50class LoadedProject: 51 macros: MacroRegistry 52 hooks: HookRegistry 53 models: UniqueKeyDict[str, Model] 54 audits: UniqueKeyDict[str, Audit] 55 dag: DAG[str]
LoadedProject( macros: sqlmesh.utils.UniqueKeyDict[str, typing.Union[sqlmesh.utils.metaprogramming.Executable, sqlmesh.core.macros.macro]], hooks: sqlmesh.utils.UniqueKeyDict[str, sqlmesh.core.hooks.hook], models: sqlmesh.utils.UniqueKeyDict[str, typing.Annotated[typing.Union[sqlmesh.core.model.definition.SqlModel, sqlmesh.core.model.definition.SeedModel, sqlmesh.core.model.definition.PythonModel], FieldInfo(default=PydanticUndefined, discriminator='source_type', extra={})]], audits: sqlmesh.utils.UniqueKeyDict[str, sqlmesh.core.audit.definition.Audit], dag: sqlmesh.utils.dag.DAG[str])
class
Loader(abc.ABC):
58class Loader(abc.ABC): 59 """Abstract base class to load macros and models for a context""" 60 61 def __init__(self) -> None: 62 self._path_mtimes: t.Dict[Path, float] = {} 63 self._dag: DAG[str] = DAG() 64 65 def load(self, context: Context) -> LoadedProject: 66 """ 67 Loads all hooks, macros, and models in the context's path 68 69 Args: 70 context: The context to load macros and models for 71 """ 72 # python files are cached by the system 73 # need to manually clear here so we can reload macros 74 linecache.clearcache() 75 76 self._context = context 77 self._path_mtimes.clear() 78 self._dag = DAG() 79 80 macros, hooks = self._load_scripts() 81 models = self._load_models(macros, hooks) 82 for model in models.values(): 83 self._add_model_to_dag(model) 84 update_model_schemas(self._context.dialect, self._dag, models) 85 86 audits = self._load_audits() 87 88 project = LoadedProject( 89 macros=macros, hooks=hooks, models=models, audits=audits, dag=self._dag 90 ) 91 return project 92 93 def reload_needed(self) -> bool: 94 """ 95 Checks for any modifications to the files the macros and models depend on 96 since the last load. 97 98 Returns: 99 True if a modification is found; False otherwise 100 """ 101 return any( 102 path.stat().st_mtime > initial_mtime 103 for path, initial_mtime in self._path_mtimes.items() 104 ) 105 106 @abc.abstractmethod 107 def _load_scripts(self) -> t.Tuple[MacroRegistry, HookRegistry]: 108 """Loads all user defined hooks and macros.""" 109 110 @abc.abstractmethod 111 def _load_models(self, macros: MacroRegistry, hooks: HookRegistry) -> UniqueKeyDict[str, Model]: 112 """Loads all models.""" 113 114 @abc.abstractmethod 115 def _load_audits(self) -> UniqueKeyDict[str, Audit]: 116 """Loads all audits.""" 117 118 def _add_model_to_dag(self, model: Model) -> None: 119 self._dag.graph[model.name] = set() 120 self._dag.add(model.name, model.depends_on) 121 122 def _track_file(self, path: Path) -> None: 123 """Project file to track for modifications""" 124 self._path_mtimes[path] = path.stat().st_mtime
Abstract base class to load macros and models for a context
65 def load(self, context: Context) -> LoadedProject: 66 """ 67 Loads all hooks, macros, and models in the context's path 68 69 Args: 70 context: The context to load macros and models for 71 """ 72 # python files are cached by the system 73 # need to manually clear here so we can reload macros 74 linecache.clearcache() 75 76 self._context = context 77 self._path_mtimes.clear() 78 self._dag = DAG() 79 80 macros, hooks = self._load_scripts() 81 models = self._load_models(macros, hooks) 82 for model in models.values(): 83 self._add_model_to_dag(model) 84 update_model_schemas(self._context.dialect, self._dag, models) 85 86 audits = self._load_audits() 87 88 project = LoadedProject( 89 macros=macros, hooks=hooks, models=models, audits=audits, dag=self._dag 90 ) 91 return project
Loads all hooks, macros, and models in the context's path
Arguments:
- context: The context to load macros and models for
def
reload_needed(self) -> bool:
93 def reload_needed(self) -> bool: 94 """ 95 Checks for any modifications to the files the macros and models depend on 96 since the last load. 97 98 Returns: 99 True if a modification is found; False otherwise 100 """ 101 return any( 102 path.stat().st_mtime > initial_mtime 103 for path, initial_mtime in self._path_mtimes.items() 104 )
Checks for any modifications to the files the macros and models depend on since the last load.
Returns:
True if a modification is found; False otherwise
127class SqlMeshLoader(Loader): 128 """Loads macros and models for a context using the SQLMesh file formats""" 129 130 def _load_scripts(self) -> t.Tuple[MacroRegistry, HookRegistry]: 131 """Loads all user defined hooks and macros.""" 132 # Store a copy of the macro registry 133 standard_hooks = hook.get_registry() 134 standard_macros = macro.get_registry() 135 136 for path in tuple(self._glob_path(self._context.macro_directory_path, ".py")) + tuple( 137 self._glob_path(self._context.hook_directory_path, ".py") 138 ): 139 if self._import_python_file(path.relative_to(self._context.path)): 140 self._track_file(path) 141 142 hooks = hook.get_registry() 143 macros = macro.get_registry() 144 145 hook.set_registry(standard_hooks) 146 macro.set_registry(standard_macros) 147 148 return macros, hooks 149 150 def _load_models(self, macros: MacroRegistry, hooks: HookRegistry) -> UniqueKeyDict[str, Model]: 151 """ 152 Loads all of the models within the model directory with their associated 153 audits into a Dict and creates the dag 154 """ 155 models = self._load_sql_models(macros, hooks) 156 models.update(self._load_python_models()) 157 158 return models 159 160 def _load_sql_models( 161 self, macros: MacroRegistry, hooks: HookRegistry 162 ) -> UniqueKeyDict[str, Model]: 163 """Loads the sql models into a Dict""" 164 models: UniqueKeyDict = UniqueKeyDict("models") 165 for path in self._glob_path(self._context.models_directory_path, ".sql"): 166 self._track_file(path) 167 with open(path, "r", encoding="utf-8") as file: 168 try: 169 expressions = parse(file.read(), default_dialect=self._context.dialect) 170 except SqlglotError as ex: 171 raise ConfigError(f"Failed to parse a model definition at '{path}': {ex}") 172 model = load_model( 173 expressions, 174 defaults=self._context.config.model_defaults.dict(), 175 macros=macros, 176 hooks=hooks, 177 path=Path(path).absolute(), 178 module_path=self._context.path, 179 dialect=self._context.dialect, 180 time_column_format=self._context.config.time_column_format, 181 ) 182 models[model.name] = model 183 184 if isinstance(model, SeedModel): 185 seed_path = model.seed_path 186 self._track_file(seed_path) 187 188 return models 189 190 def _load_python_models(self) -> UniqueKeyDict[str, Model]: 191 """Loads the python models into a Dict""" 192 models: UniqueKeyDict = UniqueKeyDict("models") 193 registry = model_registry.registry() 194 registry.clear() 195 registered: t.Set[str] = set() 196 197 for path in self._glob_path(self._context.models_directory_path, ".py"): 198 self._track_file(path) 199 self._import_python_file(path.relative_to(self._context.path)) 200 new = registry.keys() - registered 201 registered |= new 202 for name in new: 203 model = registry[name].model( 204 path=path, 205 module_path=self._context.path, 206 defaults=self._context.config.model_defaults.dict(), 207 time_column_format=self._context.config.time_column_format, 208 ) 209 models[model.name] = model 210 211 return models 212 213 def _load_audits(self) -> UniqueKeyDict[str, Audit]: 214 """Loads all the model audits.""" 215 audits_by_name: UniqueKeyDict[str, Audit] = UniqueKeyDict("audits") 216 for path in self._glob_path(self._context.audits_directory_path, ".sql"): 217 self._track_file(path) 218 with open(path, "r", encoding="utf-8") as file: 219 expressions = parse(file.read(), default_dialect=self._context.dialect) 220 audits = Audit.load_multiple( 221 expressions=expressions, 222 path=path, 223 dialect=self._context.dialect, 224 ) 225 for audit in audits: 226 audits_by_name[audit.name] = audit 227 return audits_by_name 228 229 def _import_python_file(self, relative_path: Path) -> types.ModuleType: 230 module_name = str(relative_path.with_suffix("")).replace(os.path.sep, ".") 231 # remove the entire module hierarchy in case they were already loaded 232 parts = module_name.split(".") 233 for i in range(len(parts)): 234 sys.modules.pop(".".join(parts[0 : i + 1]), None) 235 236 return importlib.import_module(module_name) 237 238 def _glob_path(self, path: Path, file_extension: str) -> t.Generator[Path, None, None]: 239 """ 240 Globs the provided path for the file extension but also removes any filepaths that match an ignore 241 pattern either set in constants or provided in config 242 243 Args: 244 path: The filepath to glob 245 file_extension: The extension to check for in that path (checks recursively in zero or more subdirectories) 246 247 Returns: 248 Matched paths that are not ignored 249 """ 250 for filepath in path.glob(f"**/*{file_extension}"): 251 for ignore_pattern in self._context.ignore_patterns: 252 if filepath.match(ignore_pattern): 253 break 254 else: 255 yield filepath
Loads macros and models for a context using the SQLMesh file formats