EngineAdapter
Engine adapters are how SQLMesh connects and interacts with various data stores. They allow SQLMesh to generalize its functionality to different engines that have Python Database API 2.0-compliant connections. Rather than executing queries directly against your data stores, SQLMesh components such as the SnapshotEvaluator delegate them to engine adapters so these components can be engine-agnostic.
1""" 2# EngineAdapter 3 4Engine adapters are how SQLMesh connects and interacts with various data stores. They allow SQLMesh to 5generalize its functionality to different engines that have Python Database API 2.0-compliant 6connections. Rather than executing queries directly against your data stores, SQLMesh components such as 7the SnapshotEvaluator delegate them to engine adapters so these components can be engine-agnostic. 8""" 9from __future__ import annotations 10 11import contextlib 12import itertools 13import logging 14import typing as t 15 16import pandas as pd 17from sqlglot import Dialect, exp, parse_one 18from sqlglot.errors import ErrorLevel 19 20from sqlmesh.core.dialect import pandas_to_sql 21from sqlmesh.core.engine_adapter._typing import ( 22 DF_TYPES, 23 QUERY_TYPES, 24 SOURCE_ALIAS, 25 TARGET_ALIAS, 26 PySparkDataFrame, 27 PySparkSession, 28 Query, 29) 30from sqlmesh.core.engine_adapter.shared import DataObject, TransactionType 31from sqlmesh.core.model.kind import TimeColumn 32from sqlmesh.utils import double_escape, optional_import 33from sqlmesh.utils.connection_pool import create_connection_pool 34from sqlmesh.utils.date import TimeLike, make_inclusive 35from sqlmesh.utils.errors import SQLMeshError 36 37if t.TYPE_CHECKING: 38 from sqlmesh.core._typing import TableName 39 from sqlmesh.core.engine_adapter._typing import DF, QueryOrDF 40 from sqlmesh.core.model.meta import IntervalUnit 41 42logger = logging.getLogger(__name__) 43 44 45class EngineAdapter: 46 """Base class wrapping a Database API compliant connection. 47 48 The EngineAdapter is an easily-subclassable interface that interacts 49 with the underlying engine and data store. 50 51 Args: 52 connection_factory: a callable which produces a new Database API-compliant 53 connection on every call. 54 dialect: The dialect with which this adapter is associated. 55 multithreaded: Indicates whether this adapter will be used by more than one thread. 56 """ 57 58 DIALECT = "" 59 DEFAULT_BATCH_SIZE = 10000 60 DEFAULT_SQL_GEN_KWARGS: t.Dict[str, str | bool | int] = {} 61 ESCAPE_JSON = False 62 63 def __init__( 64 self, 65 connection_factory: t.Callable[[], t.Any], 66 dialect: str = "", 67 sql_gen_kwargs: t.Optional[t.Dict[str, Dialect | bool | str]] = None, 68 multithreaded: bool = False, 69 ): 70 self.dialect = dialect.lower() or self.DIALECT 71 self._connection_pool = create_connection_pool(connection_factory, multithreaded) 72 self.sql_gen_kwargs = sql_gen_kwargs or {} 73 74 @property 75 def cursor(self) -> t.Any: 76 return self._connection_pool.get_cursor() 77 78 @property 79 def spark(self) -> t.Optional[PySparkSession]: 80 return None 81 82 def recycle(self) -> t.Any: 83 """Closes all open connections and releases all allocated resources associated with any thread 84 except the calling one.""" 85 self._connection_pool.close_all(exclude_calling_thread=True) 86 87 def close(self) -> t.Any: 88 """Closes all open connections and releases all allocated resources.""" 89 self._connection_pool.close_all() 90 91 def replace_query( 92 self, 93 table_name: TableName, 94 query_or_df: QueryOrDF, 95 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 96 ) -> None: 97 """Replaces an existing table with a query. 98 99 For partition based engines (hive, spark), insert override is used. For other systems, create or replace is used. 100 101 Args: 102 table_name: The name of the table (eg. prod.table) 103 query_or_df: The SQL query to run or a dataframe. 104 columns_to_types: Only used if a dataframe is provided. A mapping between the column name and its data type. 105 Expected to be ordered to match the order of values in the dataframe. 106 """ 107 table = exp.to_table(table_name) 108 if isinstance(query_or_df, pd.DataFrame): 109 if not columns_to_types: 110 raise ValueError("columns_to_types must be provided for dataframes") 111 expression = next( 112 self._pandas_to_sql( 113 query_or_df, 114 alias=table.alias_or_name, 115 columns_to_types=columns_to_types, 116 ) 117 ) 118 create = exp.Create( 119 this=table, 120 kind="TABLE", 121 replace=True, 122 expression=expression, 123 ) 124 else: 125 create = exp.Create( 126 this=table, 127 kind="TABLE", 128 replace=True, 129 expression=query_or_df, 130 ) 131 self.execute(create) 132 133 def create_index( 134 self, 135 table_name: TableName, 136 index_name: str, 137 columns: t.Tuple[str, ...], 138 exists: bool = True, 139 ) -> None: 140 """Creates a new index for the given table. 141 142 Args: 143 table_name: The name of the target table. 144 index_name: The name of the index. 145 columns: The list of columns that constitute the index. 146 exists: Indicates whether to include the IF NOT EXISTS check. 147 """ 148 149 def create_table( 150 self, 151 table_name: TableName, 152 query_or_columns_to_types: Query | t.Dict[str, exp.DataType], 153 primary_key: t.Optional[t.Tuple[str, ...]] = None, 154 exists: bool = True, 155 **kwargs: t.Any, 156 ) -> None: 157 """Create a table using a DDL statement or a CTAS. 158 159 If a query is passed in instead of column type map, CREATE TABLE AS will be used. 160 161 Args: 162 table_name: The name of the table to create. Can be fully qualified or just table name. 163 query_or_columns_to_types: A query or mapping between the column name and its data type. 164 primary_key: Determines the table primary key. 165 exists: Indicates whether to include the IF NOT EXISTS check. 166 kwargs: Optional create table properties. 167 """ 168 if isinstance(query_or_columns_to_types, dict): 169 expression = self._create_table_from_columns( 170 table_name, query_or_columns_to_types, primary_key, exists, **kwargs 171 ) 172 else: 173 expression = self._create_table_from_query( 174 table_name, query_or_columns_to_types, exists, **kwargs 175 ) 176 if expression is not None: 177 self.execute(expression) 178 179 def create_state_table( 180 self, 181 table_name: str, 182 columns_to_types: t.Dict[str, exp.DataType], 183 primary_key: t.Optional[t.Tuple[str, ...]] = None, 184 ) -> None: 185 """Create a table to store SQLMesh internal state. 186 187 Args: 188 table_name: The name of the table to create. Can be fully qualified or just table name. 189 columns_to_types: A mapping between the column name and its data type. 190 primary_key: Determines the table primary key. 191 """ 192 self.create_table( 193 table_name, 194 columns_to_types, 195 primary_key=primary_key, 196 ) 197 198 def _create_table_from_columns( 199 self, 200 table_name: TableName, 201 columns_to_types: t.Dict[str, exp.DataType], 202 primary_key: t.Optional[t.Tuple[str, ...]] = None, 203 exists: bool = True, 204 **kwargs: t.Any, 205 ) -> t.Optional[exp.Create]: 206 """ 207 Create a table using a DDL statement. 208 209 Args: 210 table_name: The name of the table to create. Can be fully qualified or just table name. 211 columns_to_types: Mapping between the column name and its data type. 212 exists: Indicates whether to include the IF NOT EXISTS check. 213 kwargs: Optional create table properties. 214 """ 215 properties = self._create_table_properties(**kwargs) 216 schema: t.Optional[exp.Schema | exp.Table] = exp.to_table(table_name) 217 schema = exp.Schema( 218 this=schema, 219 expressions=[ 220 exp.ColumnDef(this=exp.to_identifier(column), kind=kind) 221 for column, kind in columns_to_types.items() 222 ], 223 ) 224 return exp.Create( 225 this=schema, 226 kind="TABLE", 227 exists=exists, 228 properties=properties, 229 expression=None, 230 ) 231 232 def _create_table_from_query( 233 self, 234 table_name: TableName, 235 query: Query, 236 exists: bool = True, 237 **kwargs: t.Any, 238 ) -> t.Optional[exp.Create]: 239 """ 240 Create a table using a DDL statement. 241 242 Args: 243 table_name: The name of the table to create. Can be fully qualified or just table name. 244 query: The query to use for creating the table 245 exists: Indicates whether to include the IF NOT EXISTS check. 246 kwargs: Optional create table properties. 247 """ 248 properties = self._create_table_properties(**kwargs) 249 schema: t.Optional[exp.Schema | exp.Table] = exp.to_table(table_name) 250 return exp.Create( 251 this=schema, 252 kind="TABLE", 253 exists=exists, 254 properties=properties, 255 expression=query, 256 ) 257 258 def create_table_like( 259 self, 260 target_table_name: TableName, 261 source_table_name: TableName, 262 exists: bool = True, 263 ) -> None: 264 """ 265 Create a table like another table or view. 266 """ 267 target_table = exp.to_table(target_table_name) 268 source_table = exp.to_table(source_table_name) 269 create_expression = exp.Create( 270 this=target_table, 271 kind="TABLE", 272 exists=exists, 273 properties=exp.Properties( 274 expressions=[ 275 exp.LikeProperty(this=source_table), 276 ] 277 ), 278 ) 279 self.execute(create_expression) 280 281 def drop_table(self, table_name: str, exists: bool = True) -> None: 282 """Drops a table. 283 284 Args: 285 table_name: The name of the table to drop. 286 exists: If exists, defaults to True. 287 """ 288 drop_expression = exp.Drop(this=table_name, kind="TABLE", exists=exists) 289 self.execute(drop_expression) 290 291 def alter_table( 292 self, 293 table_name: TableName, 294 added_columns: t.Dict[str, str], 295 dropped_columns: t.Sequence[str], 296 ) -> None: 297 with self.transaction(TransactionType.DDL): 298 alter_table = exp.AlterTable(this=exp.to_table(table_name)) 299 300 for column_name in dropped_columns: 301 drop_column = exp.Drop(this=exp.column(column_name), kind="COLUMN") 302 alter_table.set("actions", [drop_column]) 303 304 self.execute(alter_table) 305 306 for column_name, column_type in added_columns.items(): 307 add_column = exp.ColumnDef( 308 this=exp.to_identifier(column_name), 309 kind=parse_one(column_type, into=exp.DataType), # type: ignore 310 ) 311 alter_table.set("actions", [add_column]) 312 313 self.execute(alter_table) 314 315 def create_view( 316 self, 317 view_name: TableName, 318 query_or_df: QueryOrDF, 319 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 320 replace: bool = True, 321 **create_kwargs: t.Any, 322 ) -> None: 323 """Create a view with a query or dataframe. 324 325 If a dataframe is passed in, it will be converted into a literal values statement. 326 This should only be done if the dataframe is very small! 327 328 Args: 329 view_name: The view name. 330 query_or_df: A query or dataframe. 331 columns_to_types: Columns to use in the view statement. 332 replace: Whether or not to replace an existing view defaults to True. 333 create_kwargs: Additional kwargs to pass into the Create expression 334 """ 335 schema: t.Optional[exp.Table | exp.Schema] = exp.to_table(view_name) 336 337 if isinstance(query_or_df, DF_TYPES): 338 if PySparkDataFrame is not None and isinstance(query_or_df, PySparkDataFrame): 339 query_or_df = query_or_df.toPandas() 340 341 if not isinstance(query_or_df, pd.DataFrame): 342 raise SQLMeshError("Can only create views with pandas dataframes.") 343 344 if not columns_to_types: 345 raise SQLMeshError( 346 "Creating a view with a dataframe requires passing in columns_to_types." 347 ) 348 schema = exp.Schema( 349 this=schema, 350 expressions=[exp.column(column) for column in columns_to_types], 351 ) 352 query_or_df = next(self._pandas_to_sql(query_or_df, columns_to_types=columns_to_types)) 353 354 self.execute( 355 exp.Create( 356 this=schema, 357 kind="VIEW", 358 replace=replace, 359 expression=query_or_df, 360 **create_kwargs, 361 ) 362 ) 363 364 def create_schema(self, schema_name: str, ignore_if_exists: bool = True) -> None: 365 """Create a schema from a name or qualified table name.""" 366 self.execute( 367 exp.Create( 368 this=exp.to_identifier(schema_name.split(".")[0]), 369 kind="SCHEMA", 370 exists=ignore_if_exists, 371 ) 372 ) 373 374 def drop_schema( 375 self, schema_name: str, ignore_if_not_exists: bool = True, cascade: bool = False 376 ) -> None: 377 """Drop a schema from a name or qualified table name.""" 378 self.execute( 379 exp.Drop( 380 this=exp.to_identifier(schema_name.split(".")[0]), 381 kind="SCHEMA", 382 exists=ignore_if_not_exists, 383 cascade=cascade, 384 ) 385 ) 386 387 def drop_view(self, view_name: TableName, ignore_if_not_exists: bool = True) -> None: 388 """Drop a view.""" 389 self.execute( 390 exp.Drop(this=exp.to_table(view_name), exists=ignore_if_not_exists, kind="VIEW") 391 ) 392 393 def columns(self, table_name: TableName) -> t.Dict[str, str]: 394 """Fetches column names and types for the target table.""" 395 self.execute(exp.Describe(this=exp.to_table(table_name), kind="TABLE")) 396 describe_output = self.cursor.fetchall() 397 return { 398 t[0]: t[1] 399 for t in itertools.takewhile( 400 lambda t: not t[0].startswith("#"), 401 describe_output, 402 ) 403 } 404 405 def table_exists(self, table_name: TableName) -> bool: 406 try: 407 self.execute(exp.Describe(this=exp.to_table(table_name), kind="TABLE")) 408 return True 409 except Exception: 410 return False 411 412 def delete_from(self, table_name: TableName, where: t.Union[str, exp.Expression]) -> None: 413 self.execute(exp.delete(table_name, where)) 414 415 @classmethod 416 def _insert_into_expression( 417 cls, 418 table_name: TableName, 419 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 420 ) -> t.Optional[exp.Table] | exp.Schema: 421 if not columns_to_types: 422 return exp.to_table(table_name) 423 return exp.Schema( 424 this=exp.to_table(table_name), 425 expressions=[exp.column(c) for c in columns_to_types], 426 ) 427 428 def insert_append( 429 self, 430 table_name: TableName, 431 query_or_df: QueryOrDF, 432 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 433 contains_json: bool = False, 434 ) -> None: 435 if isinstance(query_or_df, QUERY_TYPES): 436 query = t.cast(Query, query_or_df) 437 if contains_json: 438 query = self._escape_json(query) 439 return self._insert_append_query(table_name, query, columns_to_types) 440 if isinstance(query_or_df, pd.DataFrame): 441 return self._insert_append_pandas_df(table_name, query_or_df, columns_to_types) 442 raise SQLMeshError(f"Unsupported type for insert_append: {type(query_or_df)}") 443 444 @t.overload 445 @classmethod 446 def _escape_json(cls, value: Query) -> Query: 447 ... 448 449 @t.overload 450 @classmethod 451 def _escape_json(cls, value: str) -> str: 452 ... 453 454 @classmethod 455 def _escape_json(cls, value: Query | str) -> Query | str: 456 """ 457 Some engines need to add an extra escape to literals that contain JSON values. By default we don't do this 458 though 459 """ 460 if cls.ESCAPE_JSON: 461 if isinstance(value, str): 462 return double_escape(value) 463 return value.transform( 464 lambda e: exp.Literal.string(double_escape(e.name)) 465 if isinstance(e, exp.Literal) and e.args["is_string"] 466 else e 467 ) 468 return value 469 470 def _insert_append_query( 471 self, 472 table_name: TableName, 473 query: Query, 474 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 475 ) -> None: 476 self.execute( 477 exp.Insert( 478 this=self._insert_into_expression(table_name, columns_to_types), 479 expression=query, 480 overwrite=False, 481 ) 482 ) 483 484 def _insert_append_pandas_df( 485 self, 486 table_name: TableName, 487 df: pd.DataFrame, 488 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 489 ) -> None: 490 connection = self._connection_pool.get() 491 table = exp.to_table(table_name) 492 into = self._insert_into_expression(table_name, columns_to_types) 493 494 sqlalchemy = optional_import("sqlalchemy") 495 # pandas to_sql doesn't support insert overwrite, it only supports deleting the table or appending 496 if sqlalchemy and isinstance(connection, sqlalchemy.engine.Connectable): 497 df.to_sql( 498 table.sql(dialect=self.dialect), 499 connection, 500 if_exists="append", 501 index=False, 502 chunksize=self.DEFAULT_BATCH_SIZE, 503 method="multi", 504 ) 505 else: 506 if not columns_to_types: 507 raise SQLMeshError( 508 "Column Mapping must be specified when using a Pandas DataFrame and not using SQLAlchemy" 509 ) 510 with self.transaction(): 511 for i, expression in enumerate( 512 self._pandas_to_sql(df, columns_to_types, self.DEFAULT_BATCH_SIZE) 513 ): 514 self.execute( 515 exp.Insert( 516 this=into, 517 expression=expression, 518 overwrite=False, 519 ) 520 ) 521 522 def insert_overwrite_by_time_partition( 523 self, 524 table_name: TableName, 525 query_or_df: QueryOrDF, 526 start: TimeLike, 527 end: TimeLike, 528 time_formatter: t.Callable[[TimeLike], exp.Expression], 529 time_column: TimeColumn | exp.Column | str, 530 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 531 ) -> None: 532 low, high = [time_formatter(dt) for dt in make_inclusive(start, end)] 533 if isinstance(time_column, TimeColumn): 534 time_column = time_column.column 535 where = exp.Between( 536 this=exp.to_column(time_column), 537 low=low, 538 high=high, 539 ) 540 return self._insert_overwrite_by_condition(table_name, query_or_df, where, columns_to_types) 541 542 @classmethod 543 def _pandas_to_sql( 544 cls, 545 df: pd.DataFrame, 546 columns_to_types: t.Dict[str, exp.DataType], 547 batch_size: int = 0, 548 alias: str = "t", 549 ) -> t.Generator[exp.Select, None, None]: 550 yield from pandas_to_sql(df, columns_to_types, batch_size, alias) 551 552 def _insert_overwrite_by_condition( 553 self, 554 table_name: TableName, 555 query_or_df: QueryOrDF, 556 where: t.Optional[exp.Condition] = None, 557 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 558 ) -> None: 559 if where is None: 560 raise SQLMeshError( 561 "Where condition is required when doing a delete/insert for insert/overwrite" 562 ) 563 with self.transaction(): 564 self.delete_from(table_name, where=where) 565 self.insert_append(table_name, query_or_df, columns_to_types=columns_to_types) 566 567 def update_table( 568 self, 569 table_name: TableName, 570 properties: t.Dict[str, t.Any], 571 where: t.Optional[str | exp.Condition] = None, 572 contains_json: bool = False, 573 ) -> None: 574 if contains_json and properties: 575 properties = { 576 k: self._escape_json(v) 577 if isinstance(v, (str, exp.Subqueryable, exp.DerivedTable)) 578 else v 579 for k, v in properties.items() 580 } 581 self.execute(exp.update(table_name, properties, where=where)) 582 583 def _merge( 584 self, 585 target_table: TableName, 586 source_table: QueryOrDF, 587 on: exp.Expression, 588 match_expressions: t.List[exp.When], 589 ) -> None: 590 this = exp.alias_(exp.to_table(target_table), alias=TARGET_ALIAS, table=True) 591 using = exp.Subquery(this=source_table, alias=SOURCE_ALIAS) 592 self.execute( 593 exp.Merge( 594 this=this, 595 using=using, 596 on=on, 597 expressions=match_expressions, 598 ) 599 ) 600 601 def merge( 602 self, 603 target_table: TableName, 604 source_table: QueryOrDF, 605 column_names: t.Iterable[str], 606 unique_key: t.Iterable[str], 607 ) -> None: 608 on = exp.and_( 609 *( 610 exp.EQ( 611 this=exp.column(part, TARGET_ALIAS), 612 expression=exp.column(part, SOURCE_ALIAS), 613 ) 614 for part in unique_key 615 ) 616 ) 617 when_matched = exp.When( 618 matched=True, 619 source=False, 620 then=exp.Update( 621 expressions=[ 622 exp.EQ( 623 this=exp.column(col, TARGET_ALIAS), expression=exp.column(col, SOURCE_ALIAS) 624 ) 625 for col in column_names 626 ], 627 ), 628 ) 629 when_not_matched = exp.When( 630 matched=False, 631 source=False, 632 then=exp.Insert( 633 this=exp.Tuple(expressions=[exp.column(col) for col in column_names]), 634 expression=exp.Tuple( 635 expressions=[exp.column(col, SOURCE_ALIAS) for col in column_names] 636 ), 637 ), 638 ) 639 return self._merge( 640 target_table=target_table, 641 source_table=source_table, 642 on=on, 643 match_expressions=[when_matched, when_not_matched], 644 ) 645 646 def rename_table( 647 self, 648 old_table_name: TableName, 649 new_table_name: TableName, 650 ) -> None: 651 self.execute(exp.rename_table(old_table_name, new_table_name)) 652 653 def fetchone( 654 self, 655 query: t.Union[exp.Expression, str], 656 ignore_unsupported_errors: bool = False, 657 ) -> t.Tuple: 658 self.execute(query, ignore_unsupported_errors=ignore_unsupported_errors) 659 return self.cursor.fetchone() 660 661 def fetchall( 662 self, 663 query: t.Union[exp.Expression, str], 664 ignore_unsupported_errors: bool = False, 665 ) -> t.List[t.Tuple]: 666 self.execute(query, ignore_unsupported_errors=ignore_unsupported_errors) 667 return self.cursor.fetchall() 668 669 def _fetch_native_df(self, query: t.Union[exp.Expression, str]) -> DF: 670 """Fetches a DataFrame that can be either Pandas or PySpark from the cursor""" 671 self.execute(query) 672 return self.cursor.fetchdf() 673 674 def fetchdf(self, query: t.Union[exp.Expression, str]) -> pd.DataFrame: 675 """Fetches a Pandas DataFrame from the cursor""" 676 df = self._fetch_native_df(query) 677 if not isinstance(df, pd.DataFrame): 678 raise NotImplementedError( 679 "The cursor's `fetch_native_df` method is not returning a pandas DataFrame. Need to update `fetchdf` so a Pandas DataFrame is returned" 680 ) 681 return df 682 683 def fetch_pyspark_df(self, query: t.Union[exp.Expression, str]) -> PySparkDataFrame: 684 """Fetches a PySpark DataFrame from the cursor""" 685 raise NotImplementedError(f"Engine does not support PySpark DataFrames: {type(self)}") 686 687 @contextlib.contextmanager 688 def transaction( 689 self, transaction_type: TransactionType = TransactionType.DML 690 ) -> t.Generator[None, None, None]: 691 """A transaction context manager.""" 692 if self._connection_pool.is_transaction_active or not self.supports_transactions( 693 transaction_type 694 ): 695 yield 696 return 697 self._connection_pool.begin() 698 try: 699 yield 700 except Exception as e: 701 self._connection_pool.rollback() 702 raise e 703 else: 704 self._connection_pool.commit() 705 706 def supports_transactions(self, transaction_type: TransactionType) -> bool: 707 """Whether or not the engine adapter supports transactions for the given transaction type.""" 708 return True 709 710 def execute( 711 self, 712 sql: t.Union[str, exp.Expression], 713 ignore_unsupported_errors: bool = False, 714 **kwargs: t.Any, 715 ) -> None: 716 """Execute a sql query.""" 717 to_sql_kwargs = ( 718 {"unsupported_level": ErrorLevel.IGNORE} if ignore_unsupported_errors else {} 719 ) 720 sql = self._to_sql(sql, **to_sql_kwargs) if isinstance(sql, exp.Expression) else sql 721 logger.debug(f"Executing SQL:\n{sql}") 722 self.cursor.execute(sql, **kwargs) 723 724 def _create_table_properties( 725 self, 726 storage_format: t.Optional[str] = None, 727 partitioned_by: t.Optional[t.List[str]] = None, 728 partition_interval_unit: t.Optional[IntervalUnit] = None, 729 ) -> t.Optional[exp.Properties]: 730 return None 731 732 def _to_sql(self, e: exp.Expression, **kwargs: t.Any) -> str: 733 """ 734 Converts an expression to a SQL string. Has a set of default kwargs to apply, and then default 735 kwargs defined for the given dialect, and then kwargs provided by the user when defining the engine 736 adapter, and then finally kwargs provided by the user when calling this method. 737 """ 738 sql_gen_kwargs = { 739 "dialect": self.dialect, 740 "pretty": False, 741 "comments": False, 742 "identify": True, 743 **self.DEFAULT_SQL_GEN_KWARGS, 744 **self.sql_gen_kwargs, 745 **kwargs, 746 } 747 return e.sql(**sql_gen_kwargs) # type: ignore 748 749 def _get_data_objects( 750 self, schema_name: str, catalog_name: t.Optional[str] = None 751 ) -> t.List[DataObject]: 752 """ 753 Returns all the data objects that exist in the given schema and optionally catalog. 754 """ 755 756 raise NotImplementedError() 757 758 759class EngineAdapterWithIndexSupport(EngineAdapter): 760 def create_index( 761 self, 762 table_name: TableName, 763 index_name: str, 764 columns: t.Tuple[str, ...], 765 exists: bool = True, 766 ) -> None: 767 expression = exp.Create( 768 this=exp.Index( 769 this=exp.to_identifier(index_name), 770 table=exp.to_table(table_name), 771 columns=exp.Tuple( 772 expressions=[exp.to_column(c) for c in columns], 773 ), 774 ), 775 kind="INDEX", 776 exists=exists, 777 ) 778 self.execute(expression) 779 780 def _create_table_from_columns( 781 self, 782 table_name: TableName, 783 columns_to_types: t.Dict[str, exp.DataType], 784 primary_key: t.Optional[t.Tuple[str, ...]] = None, 785 exists: bool = True, 786 **kwargs: t.Any, 787 ) -> t.Optional[exp.Create]: 788 expression = super()._create_table_from_columns( 789 table_name, columns_to_types, primary_key, exists, **kwargs 790 ) 791 if expression is None or primary_key is None: 792 return expression 793 794 schema = expression.this 795 schema.append( 796 "expressions", 797 exp.Anonymous(this="PRIMARY KEY", expressions=[exp.to_column(k) for k in primary_key]), 798 ) 799 return expression
46class EngineAdapter: 47 """Base class wrapping a Database API compliant connection. 48 49 The EngineAdapter is an easily-subclassable interface that interacts 50 with the underlying engine and data store. 51 52 Args: 53 connection_factory: a callable which produces a new Database API-compliant 54 connection on every call. 55 dialect: The dialect with which this adapter is associated. 56 multithreaded: Indicates whether this adapter will be used by more than one thread. 57 """ 58 59 DIALECT = "" 60 DEFAULT_BATCH_SIZE = 10000 61 DEFAULT_SQL_GEN_KWARGS: t.Dict[str, str | bool | int] = {} 62 ESCAPE_JSON = False 63 64 def __init__( 65 self, 66 connection_factory: t.Callable[[], t.Any], 67 dialect: str = "", 68 sql_gen_kwargs: t.Optional[t.Dict[str, Dialect | bool | str]] = None, 69 multithreaded: bool = False, 70 ): 71 self.dialect = dialect.lower() or self.DIALECT 72 self._connection_pool = create_connection_pool(connection_factory, multithreaded) 73 self.sql_gen_kwargs = sql_gen_kwargs or {} 74 75 @property 76 def cursor(self) -> t.Any: 77 return self._connection_pool.get_cursor() 78 79 @property 80 def spark(self) -> t.Optional[PySparkSession]: 81 return None 82 83 def recycle(self) -> t.Any: 84 """Closes all open connections and releases all allocated resources associated with any thread 85 except the calling one.""" 86 self._connection_pool.close_all(exclude_calling_thread=True) 87 88 def close(self) -> t.Any: 89 """Closes all open connections and releases all allocated resources.""" 90 self._connection_pool.close_all() 91 92 def replace_query( 93 self, 94 table_name: TableName, 95 query_or_df: QueryOrDF, 96 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 97 ) -> None: 98 """Replaces an existing table with a query. 99 100 For partition based engines (hive, spark), insert override is used. For other systems, create or replace is used. 101 102 Args: 103 table_name: The name of the table (eg. prod.table) 104 query_or_df: The SQL query to run or a dataframe. 105 columns_to_types: Only used if a dataframe is provided. A mapping between the column name and its data type. 106 Expected to be ordered to match the order of values in the dataframe. 107 """ 108 table = exp.to_table(table_name) 109 if isinstance(query_or_df, pd.DataFrame): 110 if not columns_to_types: 111 raise ValueError("columns_to_types must be provided for dataframes") 112 expression = next( 113 self._pandas_to_sql( 114 query_or_df, 115 alias=table.alias_or_name, 116 columns_to_types=columns_to_types, 117 ) 118 ) 119 create = exp.Create( 120 this=table, 121 kind="TABLE", 122 replace=True, 123 expression=expression, 124 ) 125 else: 126 create = exp.Create( 127 this=table, 128 kind="TABLE", 129 replace=True, 130 expression=query_or_df, 131 ) 132 self.execute(create) 133 134 def create_index( 135 self, 136 table_name: TableName, 137 index_name: str, 138 columns: t.Tuple[str, ...], 139 exists: bool = True, 140 ) -> None: 141 """Creates a new index for the given table. 142 143 Args: 144 table_name: The name of the target table. 145 index_name: The name of the index. 146 columns: The list of columns that constitute the index. 147 exists: Indicates whether to include the IF NOT EXISTS check. 148 """ 149 150 def create_table( 151 self, 152 table_name: TableName, 153 query_or_columns_to_types: Query | t.Dict[str, exp.DataType], 154 primary_key: t.Optional[t.Tuple[str, ...]] = None, 155 exists: bool = True, 156 **kwargs: t.Any, 157 ) -> None: 158 """Create a table using a DDL statement or a CTAS. 159 160 If a query is passed in instead of column type map, CREATE TABLE AS will be used. 161 162 Args: 163 table_name: The name of the table to create. Can be fully qualified or just table name. 164 query_or_columns_to_types: A query or mapping between the column name and its data type. 165 primary_key: Determines the table primary key. 166 exists: Indicates whether to include the IF NOT EXISTS check. 167 kwargs: Optional create table properties. 168 """ 169 if isinstance(query_or_columns_to_types, dict): 170 expression = self._create_table_from_columns( 171 table_name, query_or_columns_to_types, primary_key, exists, **kwargs 172 ) 173 else: 174 expression = self._create_table_from_query( 175 table_name, query_or_columns_to_types, exists, **kwargs 176 ) 177 if expression is not None: 178 self.execute(expression) 179 180 def create_state_table( 181 self, 182 table_name: str, 183 columns_to_types: t.Dict[str, exp.DataType], 184 primary_key: t.Optional[t.Tuple[str, ...]] = None, 185 ) -> None: 186 """Create a table to store SQLMesh internal state. 187 188 Args: 189 table_name: The name of the table to create. Can be fully qualified or just table name. 190 columns_to_types: A mapping between the column name and its data type. 191 primary_key: Determines the table primary key. 192 """ 193 self.create_table( 194 table_name, 195 columns_to_types, 196 primary_key=primary_key, 197 ) 198 199 def _create_table_from_columns( 200 self, 201 table_name: TableName, 202 columns_to_types: t.Dict[str, exp.DataType], 203 primary_key: t.Optional[t.Tuple[str, ...]] = None, 204 exists: bool = True, 205 **kwargs: t.Any, 206 ) -> t.Optional[exp.Create]: 207 """ 208 Create a table using a DDL statement. 209 210 Args: 211 table_name: The name of the table to create. Can be fully qualified or just table name. 212 columns_to_types: Mapping between the column name and its data type. 213 exists: Indicates whether to include the IF NOT EXISTS check. 214 kwargs: Optional create table properties. 215 """ 216 properties = self._create_table_properties(**kwargs) 217 schema: t.Optional[exp.Schema | exp.Table] = exp.to_table(table_name) 218 schema = exp.Schema( 219 this=schema, 220 expressions=[ 221 exp.ColumnDef(this=exp.to_identifier(column), kind=kind) 222 for column, kind in columns_to_types.items() 223 ], 224 ) 225 return exp.Create( 226 this=schema, 227 kind="TABLE", 228 exists=exists, 229 properties=properties, 230 expression=None, 231 ) 232 233 def _create_table_from_query( 234 self, 235 table_name: TableName, 236 query: Query, 237 exists: bool = True, 238 **kwargs: t.Any, 239 ) -> t.Optional[exp.Create]: 240 """ 241 Create a table using a DDL statement. 242 243 Args: 244 table_name: The name of the table to create. Can be fully qualified or just table name. 245 query: The query to use for creating the table 246 exists: Indicates whether to include the IF NOT EXISTS check. 247 kwargs: Optional create table properties. 248 """ 249 properties = self._create_table_properties(**kwargs) 250 schema: t.Optional[exp.Schema | exp.Table] = exp.to_table(table_name) 251 return exp.Create( 252 this=schema, 253 kind="TABLE", 254 exists=exists, 255 properties=properties, 256 expression=query, 257 ) 258 259 def create_table_like( 260 self, 261 target_table_name: TableName, 262 source_table_name: TableName, 263 exists: bool = True, 264 ) -> None: 265 """ 266 Create a table like another table or view. 267 """ 268 target_table = exp.to_table(target_table_name) 269 source_table = exp.to_table(source_table_name) 270 create_expression = exp.Create( 271 this=target_table, 272 kind="TABLE", 273 exists=exists, 274 properties=exp.Properties( 275 expressions=[ 276 exp.LikeProperty(this=source_table), 277 ] 278 ), 279 ) 280 self.execute(create_expression) 281 282 def drop_table(self, table_name: str, exists: bool = True) -> None: 283 """Drops a table. 284 285 Args: 286 table_name: The name of the table to drop. 287 exists: If exists, defaults to True. 288 """ 289 drop_expression = exp.Drop(this=table_name, kind="TABLE", exists=exists) 290 self.execute(drop_expression) 291 292 def alter_table( 293 self, 294 table_name: TableName, 295 added_columns: t.Dict[str, str], 296 dropped_columns: t.Sequence[str], 297 ) -> None: 298 with self.transaction(TransactionType.DDL): 299 alter_table = exp.AlterTable(this=exp.to_table(table_name)) 300 301 for column_name in dropped_columns: 302 drop_column = exp.Drop(this=exp.column(column_name), kind="COLUMN") 303 alter_table.set("actions", [drop_column]) 304 305 self.execute(alter_table) 306 307 for column_name, column_type in added_columns.items(): 308 add_column = exp.ColumnDef( 309 this=exp.to_identifier(column_name), 310 kind=parse_one(column_type, into=exp.DataType), # type: ignore 311 ) 312 alter_table.set("actions", [add_column]) 313 314 self.execute(alter_table) 315 316 def create_view( 317 self, 318 view_name: TableName, 319 query_or_df: QueryOrDF, 320 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 321 replace: bool = True, 322 **create_kwargs: t.Any, 323 ) -> None: 324 """Create a view with a query or dataframe. 325 326 If a dataframe is passed in, it will be converted into a literal values statement. 327 This should only be done if the dataframe is very small! 328 329 Args: 330 view_name: The view name. 331 query_or_df: A query or dataframe. 332 columns_to_types: Columns to use in the view statement. 333 replace: Whether or not to replace an existing view defaults to True. 334 create_kwargs: Additional kwargs to pass into the Create expression 335 """ 336 schema: t.Optional[exp.Table | exp.Schema] = exp.to_table(view_name) 337 338 if isinstance(query_or_df, DF_TYPES): 339 if PySparkDataFrame is not None and isinstance(query_or_df, PySparkDataFrame): 340 query_or_df = query_or_df.toPandas() 341 342 if not isinstance(query_or_df, pd.DataFrame): 343 raise SQLMeshError("Can only create views with pandas dataframes.") 344 345 if not columns_to_types: 346 raise SQLMeshError( 347 "Creating a view with a dataframe requires passing in columns_to_types." 348 ) 349 schema = exp.Schema( 350 this=schema, 351 expressions=[exp.column(column) for column in columns_to_types], 352 ) 353 query_or_df = next(self._pandas_to_sql(query_or_df, columns_to_types=columns_to_types)) 354 355 self.execute( 356 exp.Create( 357 this=schema, 358 kind="VIEW", 359 replace=replace, 360 expression=query_or_df, 361 **create_kwargs, 362 ) 363 ) 364 365 def create_schema(self, schema_name: str, ignore_if_exists: bool = True) -> None: 366 """Create a schema from a name or qualified table name.""" 367 self.execute( 368 exp.Create( 369 this=exp.to_identifier(schema_name.split(".")[0]), 370 kind="SCHEMA", 371 exists=ignore_if_exists, 372 ) 373 ) 374 375 def drop_schema( 376 self, schema_name: str, ignore_if_not_exists: bool = True, cascade: bool = False 377 ) -> None: 378 """Drop a schema from a name or qualified table name.""" 379 self.execute( 380 exp.Drop( 381 this=exp.to_identifier(schema_name.split(".")[0]), 382 kind="SCHEMA", 383 exists=ignore_if_not_exists, 384 cascade=cascade, 385 ) 386 ) 387 388 def drop_view(self, view_name: TableName, ignore_if_not_exists: bool = True) -> None: 389 """Drop a view.""" 390 self.execute( 391 exp.Drop(this=exp.to_table(view_name), exists=ignore_if_not_exists, kind="VIEW") 392 ) 393 394 def columns(self, table_name: TableName) -> t.Dict[str, str]: 395 """Fetches column names and types for the target table.""" 396 self.execute(exp.Describe(this=exp.to_table(table_name), kind="TABLE")) 397 describe_output = self.cursor.fetchall() 398 return { 399 t[0]: t[1] 400 for t in itertools.takewhile( 401 lambda t: not t[0].startswith("#"), 402 describe_output, 403 ) 404 } 405 406 def table_exists(self, table_name: TableName) -> bool: 407 try: 408 self.execute(exp.Describe(this=exp.to_table(table_name), kind="TABLE")) 409 return True 410 except Exception: 411 return False 412 413 def delete_from(self, table_name: TableName, where: t.Union[str, exp.Expression]) -> None: 414 self.execute(exp.delete(table_name, where)) 415 416 @classmethod 417 def _insert_into_expression( 418 cls, 419 table_name: TableName, 420 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 421 ) -> t.Optional[exp.Table] | exp.Schema: 422 if not columns_to_types: 423 return exp.to_table(table_name) 424 return exp.Schema( 425 this=exp.to_table(table_name), 426 expressions=[exp.column(c) for c in columns_to_types], 427 ) 428 429 def insert_append( 430 self, 431 table_name: TableName, 432 query_or_df: QueryOrDF, 433 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 434 contains_json: bool = False, 435 ) -> None: 436 if isinstance(query_or_df, QUERY_TYPES): 437 query = t.cast(Query, query_or_df) 438 if contains_json: 439 query = self._escape_json(query) 440 return self._insert_append_query(table_name, query, columns_to_types) 441 if isinstance(query_or_df, pd.DataFrame): 442 return self._insert_append_pandas_df(table_name, query_or_df, columns_to_types) 443 raise SQLMeshError(f"Unsupported type for insert_append: {type(query_or_df)}") 444 445 @t.overload 446 @classmethod 447 def _escape_json(cls, value: Query) -> Query: 448 ... 449 450 @t.overload 451 @classmethod 452 def _escape_json(cls, value: str) -> str: 453 ... 454 455 @classmethod 456 def _escape_json(cls, value: Query | str) -> Query | str: 457 """ 458 Some engines need to add an extra escape to literals that contain JSON values. By default we don't do this 459 though 460 """ 461 if cls.ESCAPE_JSON: 462 if isinstance(value, str): 463 return double_escape(value) 464 return value.transform( 465 lambda e: exp.Literal.string(double_escape(e.name)) 466 if isinstance(e, exp.Literal) and e.args["is_string"] 467 else e 468 ) 469 return value 470 471 def _insert_append_query( 472 self, 473 table_name: TableName, 474 query: Query, 475 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 476 ) -> None: 477 self.execute( 478 exp.Insert( 479 this=self._insert_into_expression(table_name, columns_to_types), 480 expression=query, 481 overwrite=False, 482 ) 483 ) 484 485 def _insert_append_pandas_df( 486 self, 487 table_name: TableName, 488 df: pd.DataFrame, 489 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 490 ) -> None: 491 connection = self._connection_pool.get() 492 table = exp.to_table(table_name) 493 into = self._insert_into_expression(table_name, columns_to_types) 494 495 sqlalchemy = optional_import("sqlalchemy") 496 # pandas to_sql doesn't support insert overwrite, it only supports deleting the table or appending 497 if sqlalchemy and isinstance(connection, sqlalchemy.engine.Connectable): 498 df.to_sql( 499 table.sql(dialect=self.dialect), 500 connection, 501 if_exists="append", 502 index=False, 503 chunksize=self.DEFAULT_BATCH_SIZE, 504 method="multi", 505 ) 506 else: 507 if not columns_to_types: 508 raise SQLMeshError( 509 "Column Mapping must be specified when using a Pandas DataFrame and not using SQLAlchemy" 510 ) 511 with self.transaction(): 512 for i, expression in enumerate( 513 self._pandas_to_sql(df, columns_to_types, self.DEFAULT_BATCH_SIZE) 514 ): 515 self.execute( 516 exp.Insert( 517 this=into, 518 expression=expression, 519 overwrite=False, 520 ) 521 ) 522 523 def insert_overwrite_by_time_partition( 524 self, 525 table_name: TableName, 526 query_or_df: QueryOrDF, 527 start: TimeLike, 528 end: TimeLike, 529 time_formatter: t.Callable[[TimeLike], exp.Expression], 530 time_column: TimeColumn | exp.Column | str, 531 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 532 ) -> None: 533 low, high = [time_formatter(dt) for dt in make_inclusive(start, end)] 534 if isinstance(time_column, TimeColumn): 535 time_column = time_column.column 536 where = exp.Between( 537 this=exp.to_column(time_column), 538 low=low, 539 high=high, 540 ) 541 return self._insert_overwrite_by_condition(table_name, query_or_df, where, columns_to_types) 542 543 @classmethod 544 def _pandas_to_sql( 545 cls, 546 df: pd.DataFrame, 547 columns_to_types: t.Dict[str, exp.DataType], 548 batch_size: int = 0, 549 alias: str = "t", 550 ) -> t.Generator[exp.Select, None, None]: 551 yield from pandas_to_sql(df, columns_to_types, batch_size, alias) 552 553 def _insert_overwrite_by_condition( 554 self, 555 table_name: TableName, 556 query_or_df: QueryOrDF, 557 where: t.Optional[exp.Condition] = None, 558 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 559 ) -> None: 560 if where is None: 561 raise SQLMeshError( 562 "Where condition is required when doing a delete/insert for insert/overwrite" 563 ) 564 with self.transaction(): 565 self.delete_from(table_name, where=where) 566 self.insert_append(table_name, query_or_df, columns_to_types=columns_to_types) 567 568 def update_table( 569 self, 570 table_name: TableName, 571 properties: t.Dict[str, t.Any], 572 where: t.Optional[str | exp.Condition] = None, 573 contains_json: bool = False, 574 ) -> None: 575 if contains_json and properties: 576 properties = { 577 k: self._escape_json(v) 578 if isinstance(v, (str, exp.Subqueryable, exp.DerivedTable)) 579 else v 580 for k, v in properties.items() 581 } 582 self.execute(exp.update(table_name, properties, where=where)) 583 584 def _merge( 585 self, 586 target_table: TableName, 587 source_table: QueryOrDF, 588 on: exp.Expression, 589 match_expressions: t.List[exp.When], 590 ) -> None: 591 this = exp.alias_(exp.to_table(target_table), alias=TARGET_ALIAS, table=True) 592 using = exp.Subquery(this=source_table, alias=SOURCE_ALIAS) 593 self.execute( 594 exp.Merge( 595 this=this, 596 using=using, 597 on=on, 598 expressions=match_expressions, 599 ) 600 ) 601 602 def merge( 603 self, 604 target_table: TableName, 605 source_table: QueryOrDF, 606 column_names: t.Iterable[str], 607 unique_key: t.Iterable[str], 608 ) -> None: 609 on = exp.and_( 610 *( 611 exp.EQ( 612 this=exp.column(part, TARGET_ALIAS), 613 expression=exp.column(part, SOURCE_ALIAS), 614 ) 615 for part in unique_key 616 ) 617 ) 618 when_matched = exp.When( 619 matched=True, 620 source=False, 621 then=exp.Update( 622 expressions=[ 623 exp.EQ( 624 this=exp.column(col, TARGET_ALIAS), expression=exp.column(col, SOURCE_ALIAS) 625 ) 626 for col in column_names 627 ], 628 ), 629 ) 630 when_not_matched = exp.When( 631 matched=False, 632 source=False, 633 then=exp.Insert( 634 this=exp.Tuple(expressions=[exp.column(col) for col in column_names]), 635 expression=exp.Tuple( 636 expressions=[exp.column(col, SOURCE_ALIAS) for col in column_names] 637 ), 638 ), 639 ) 640 return self._merge( 641 target_table=target_table, 642 source_table=source_table, 643 on=on, 644 match_expressions=[when_matched, when_not_matched], 645 ) 646 647 def rename_table( 648 self, 649 old_table_name: TableName, 650 new_table_name: TableName, 651 ) -> None: 652 self.execute(exp.rename_table(old_table_name, new_table_name)) 653 654 def fetchone( 655 self, 656 query: t.Union[exp.Expression, str], 657 ignore_unsupported_errors: bool = False, 658 ) -> t.Tuple: 659 self.execute(query, ignore_unsupported_errors=ignore_unsupported_errors) 660 return self.cursor.fetchone() 661 662 def fetchall( 663 self, 664 query: t.Union[exp.Expression, str], 665 ignore_unsupported_errors: bool = False, 666 ) -> t.List[t.Tuple]: 667 self.execute(query, ignore_unsupported_errors=ignore_unsupported_errors) 668 return self.cursor.fetchall() 669 670 def _fetch_native_df(self, query: t.Union[exp.Expression, str]) -> DF: 671 """Fetches a DataFrame that can be either Pandas or PySpark from the cursor""" 672 self.execute(query) 673 return self.cursor.fetchdf() 674 675 def fetchdf(self, query: t.Union[exp.Expression, str]) -> pd.DataFrame: 676 """Fetches a Pandas DataFrame from the cursor""" 677 df = self._fetch_native_df(query) 678 if not isinstance(df, pd.DataFrame): 679 raise NotImplementedError( 680 "The cursor's `fetch_native_df` method is not returning a pandas DataFrame. Need to update `fetchdf` so a Pandas DataFrame is returned" 681 ) 682 return df 683 684 def fetch_pyspark_df(self, query: t.Union[exp.Expression, str]) -> PySparkDataFrame: 685 """Fetches a PySpark DataFrame from the cursor""" 686 raise NotImplementedError(f"Engine does not support PySpark DataFrames: {type(self)}") 687 688 @contextlib.contextmanager 689 def transaction( 690 self, transaction_type: TransactionType = TransactionType.DML 691 ) -> t.Generator[None, None, None]: 692 """A transaction context manager.""" 693 if self._connection_pool.is_transaction_active or not self.supports_transactions( 694 transaction_type 695 ): 696 yield 697 return 698 self._connection_pool.begin() 699 try: 700 yield 701 except Exception as e: 702 self._connection_pool.rollback() 703 raise e 704 else: 705 self._connection_pool.commit() 706 707 def supports_transactions(self, transaction_type: TransactionType) -> bool: 708 """Whether or not the engine adapter supports transactions for the given transaction type.""" 709 return True 710 711 def execute( 712 self, 713 sql: t.Union[str, exp.Expression], 714 ignore_unsupported_errors: bool = False, 715 **kwargs: t.Any, 716 ) -> None: 717 """Execute a sql query.""" 718 to_sql_kwargs = ( 719 {"unsupported_level": ErrorLevel.IGNORE} if ignore_unsupported_errors else {} 720 ) 721 sql = self._to_sql(sql, **to_sql_kwargs) if isinstance(sql, exp.Expression) else sql 722 logger.debug(f"Executing SQL:\n{sql}") 723 self.cursor.execute(sql, **kwargs) 724 725 def _create_table_properties( 726 self, 727 storage_format: t.Optional[str] = None, 728 partitioned_by: t.Optional[t.List[str]] = None, 729 partition_interval_unit: t.Optional[IntervalUnit] = None, 730 ) -> t.Optional[exp.Properties]: 731 return None 732 733 def _to_sql(self, e: exp.Expression, **kwargs: t.Any) -> str: 734 """ 735 Converts an expression to a SQL string. Has a set of default kwargs to apply, and then default 736 kwargs defined for the given dialect, and then kwargs provided by the user when defining the engine 737 adapter, and then finally kwargs provided by the user when calling this method. 738 """ 739 sql_gen_kwargs = { 740 "dialect": self.dialect, 741 "pretty": False, 742 "comments": False, 743 "identify": True, 744 **self.DEFAULT_SQL_GEN_KWARGS, 745 **self.sql_gen_kwargs, 746 **kwargs, 747 } 748 return e.sql(**sql_gen_kwargs) # type: ignore 749 750 def _get_data_objects( 751 self, schema_name: str, catalog_name: t.Optional[str] = None 752 ) -> t.List[DataObject]: 753 """ 754 Returns all the data objects that exist in the given schema and optionally catalog. 755 """ 756 757 raise NotImplementedError()
Base class wrapping a Database API compliant connection.
The EngineAdapter is an easily-subclassable interface that interacts with the underlying engine and data store.
Arguments:
- connection_factory: a callable which produces a new Database API-compliant connection on every call.
- dialect: The dialect with which this adapter is associated.
- multithreaded: Indicates whether this adapter will be used by more than one thread.
64 def __init__( 65 self, 66 connection_factory: t.Callable[[], t.Any], 67 dialect: str = "", 68 sql_gen_kwargs: t.Optional[t.Dict[str, Dialect | bool | str]] = None, 69 multithreaded: bool = False, 70 ): 71 self.dialect = dialect.lower() or self.DIALECT 72 self._connection_pool = create_connection_pool(connection_factory, multithreaded) 73 self.sql_gen_kwargs = sql_gen_kwargs or {}
83 def recycle(self) -> t.Any: 84 """Closes all open connections and releases all allocated resources associated with any thread 85 except the calling one.""" 86 self._connection_pool.close_all(exclude_calling_thread=True)
Closes all open connections and releases all allocated resources associated with any thread except the calling one.
88 def close(self) -> t.Any: 89 """Closes all open connections and releases all allocated resources.""" 90 self._connection_pool.close_all()
Closes all open connections and releases all allocated resources.
92 def replace_query( 93 self, 94 table_name: TableName, 95 query_or_df: QueryOrDF, 96 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 97 ) -> None: 98 """Replaces an existing table with a query. 99 100 For partition based engines (hive, spark), insert override is used. For other systems, create or replace is used. 101 102 Args: 103 table_name: The name of the table (eg. prod.table) 104 query_or_df: The SQL query to run or a dataframe. 105 columns_to_types: Only used if a dataframe is provided. A mapping between the column name and its data type. 106 Expected to be ordered to match the order of values in the dataframe. 107 """ 108 table = exp.to_table(table_name) 109 if isinstance(query_or_df, pd.DataFrame): 110 if not columns_to_types: 111 raise ValueError("columns_to_types must be provided for dataframes") 112 expression = next( 113 self._pandas_to_sql( 114 query_or_df, 115 alias=table.alias_or_name, 116 columns_to_types=columns_to_types, 117 ) 118 ) 119 create = exp.Create( 120 this=table, 121 kind="TABLE", 122 replace=True, 123 expression=expression, 124 ) 125 else: 126 create = exp.Create( 127 this=table, 128 kind="TABLE", 129 replace=True, 130 expression=query_or_df, 131 ) 132 self.execute(create)
Replaces an existing table with a query.
For partition based engines (hive, spark), insert override is used. For other systems, create or replace is used.
Arguments:
- table_name: The name of the table (eg. prod.table)
- query_or_df: The SQL query to run or a dataframe.
- columns_to_types: Only used if a dataframe is provided. A mapping between the column name and its data type. Expected to be ordered to match the order of values in the dataframe.
134 def create_index( 135 self, 136 table_name: TableName, 137 index_name: str, 138 columns: t.Tuple[str, ...], 139 exists: bool = True, 140 ) -> None: 141 """Creates a new index for the given table. 142 143 Args: 144 table_name: The name of the target table. 145 index_name: The name of the index. 146 columns: The list of columns that constitute the index. 147 exists: Indicates whether to include the IF NOT EXISTS check. 148 """
Creates a new index for the given table.
Arguments:
- table_name: The name of the target table.
- index_name: The name of the index.
- columns: The list of columns that constitute the index.
- exists: Indicates whether to include the IF NOT EXISTS check.
150 def create_table( 151 self, 152 table_name: TableName, 153 query_or_columns_to_types: Query | t.Dict[str, exp.DataType], 154 primary_key: t.Optional[t.Tuple[str, ...]] = None, 155 exists: bool = True, 156 **kwargs: t.Any, 157 ) -> None: 158 """Create a table using a DDL statement or a CTAS. 159 160 If a query is passed in instead of column type map, CREATE TABLE AS will be used. 161 162 Args: 163 table_name: The name of the table to create. Can be fully qualified or just table name. 164 query_or_columns_to_types: A query or mapping between the column name and its data type. 165 primary_key: Determines the table primary key. 166 exists: Indicates whether to include the IF NOT EXISTS check. 167 kwargs: Optional create table properties. 168 """ 169 if isinstance(query_or_columns_to_types, dict): 170 expression = self._create_table_from_columns( 171 table_name, query_or_columns_to_types, primary_key, exists, **kwargs 172 ) 173 else: 174 expression = self._create_table_from_query( 175 table_name, query_or_columns_to_types, exists, **kwargs 176 ) 177 if expression is not None: 178 self.execute(expression)
Create a table using a DDL statement or a CTAS.
If a query is passed in instead of column type map, CREATE TABLE AS will be used.
Arguments:
- table_name: The name of the table to create. Can be fully qualified or just table name.
- query_or_columns_to_types: A query or mapping between the column name and its data type.
- primary_key: Determines the table primary key.
- exists: Indicates whether to include the IF NOT EXISTS check.
- kwargs: Optional create table properties.
180 def create_state_table( 181 self, 182 table_name: str, 183 columns_to_types: t.Dict[str, exp.DataType], 184 primary_key: t.Optional[t.Tuple[str, ...]] = None, 185 ) -> None: 186 """Create a table to store SQLMesh internal state. 187 188 Args: 189 table_name: The name of the table to create. Can be fully qualified or just table name. 190 columns_to_types: A mapping between the column name and its data type. 191 primary_key: Determines the table primary key. 192 """ 193 self.create_table( 194 table_name, 195 columns_to_types, 196 primary_key=primary_key, 197 )
Create a table to store SQLMesh internal state.
Arguments:
- table_name: The name of the table to create. Can be fully qualified or just table name.
- columns_to_types: A mapping between the column name and its data type.
- primary_key: Determines the table primary key.
259 def create_table_like( 260 self, 261 target_table_name: TableName, 262 source_table_name: TableName, 263 exists: bool = True, 264 ) -> None: 265 """ 266 Create a table like another table or view. 267 """ 268 target_table = exp.to_table(target_table_name) 269 source_table = exp.to_table(source_table_name) 270 create_expression = exp.Create( 271 this=target_table, 272 kind="TABLE", 273 exists=exists, 274 properties=exp.Properties( 275 expressions=[ 276 exp.LikeProperty(this=source_table), 277 ] 278 ), 279 ) 280 self.execute(create_expression)
Create a table like another table or view.
282 def drop_table(self, table_name: str, exists: bool = True) -> None: 283 """Drops a table. 284 285 Args: 286 table_name: The name of the table to drop. 287 exists: If exists, defaults to True. 288 """ 289 drop_expression = exp.Drop(this=table_name, kind="TABLE", exists=exists) 290 self.execute(drop_expression)
Drops a table.
Arguments:
- table_name: The name of the table to drop.
- exists: If exists, defaults to True.
292 def alter_table( 293 self, 294 table_name: TableName, 295 added_columns: t.Dict[str, str], 296 dropped_columns: t.Sequence[str], 297 ) -> None: 298 with self.transaction(TransactionType.DDL): 299 alter_table = exp.AlterTable(this=exp.to_table(table_name)) 300 301 for column_name in dropped_columns: 302 drop_column = exp.Drop(this=exp.column(column_name), kind="COLUMN") 303 alter_table.set("actions", [drop_column]) 304 305 self.execute(alter_table) 306 307 for column_name, column_type in added_columns.items(): 308 add_column = exp.ColumnDef( 309 this=exp.to_identifier(column_name), 310 kind=parse_one(column_type, into=exp.DataType), # type: ignore 311 ) 312 alter_table.set("actions", [add_column]) 313 314 self.execute(alter_table)
316 def create_view( 317 self, 318 view_name: TableName, 319 query_or_df: QueryOrDF, 320 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 321 replace: bool = True, 322 **create_kwargs: t.Any, 323 ) -> None: 324 """Create a view with a query or dataframe. 325 326 If a dataframe is passed in, it will be converted into a literal values statement. 327 This should only be done if the dataframe is very small! 328 329 Args: 330 view_name: The view name. 331 query_or_df: A query or dataframe. 332 columns_to_types: Columns to use in the view statement. 333 replace: Whether or not to replace an existing view defaults to True. 334 create_kwargs: Additional kwargs to pass into the Create expression 335 """ 336 schema: t.Optional[exp.Table | exp.Schema] = exp.to_table(view_name) 337 338 if isinstance(query_or_df, DF_TYPES): 339 if PySparkDataFrame is not None and isinstance(query_or_df, PySparkDataFrame): 340 query_or_df = query_or_df.toPandas() 341 342 if not isinstance(query_or_df, pd.DataFrame): 343 raise SQLMeshError("Can only create views with pandas dataframes.") 344 345 if not columns_to_types: 346 raise SQLMeshError( 347 "Creating a view with a dataframe requires passing in columns_to_types." 348 ) 349 schema = exp.Schema( 350 this=schema, 351 expressions=[exp.column(column) for column in columns_to_types], 352 ) 353 query_or_df = next(self._pandas_to_sql(query_or_df, columns_to_types=columns_to_types)) 354 355 self.execute( 356 exp.Create( 357 this=schema, 358 kind="VIEW", 359 replace=replace, 360 expression=query_or_df, 361 **create_kwargs, 362 ) 363 )
Create a view with a query or dataframe.
If a dataframe is passed in, it will be converted into a literal values statement. This should only be done if the dataframe is very small!
Arguments:
- view_name: The view name.
- query_or_df: A query or dataframe.
- columns_to_types: Columns to use in the view statement.
- replace: Whether or not to replace an existing view defaults to True.
- create_kwargs: Additional kwargs to pass into the Create expression
365 def create_schema(self, schema_name: str, ignore_if_exists: bool = True) -> None: 366 """Create a schema from a name or qualified table name.""" 367 self.execute( 368 exp.Create( 369 this=exp.to_identifier(schema_name.split(".")[0]), 370 kind="SCHEMA", 371 exists=ignore_if_exists, 372 ) 373 )
Create a schema from a name or qualified table name.
375 def drop_schema( 376 self, schema_name: str, ignore_if_not_exists: bool = True, cascade: bool = False 377 ) -> None: 378 """Drop a schema from a name or qualified table name.""" 379 self.execute( 380 exp.Drop( 381 this=exp.to_identifier(schema_name.split(".")[0]), 382 kind="SCHEMA", 383 exists=ignore_if_not_exists, 384 cascade=cascade, 385 ) 386 )
Drop a schema from a name or qualified table name.
388 def drop_view(self, view_name: TableName, ignore_if_not_exists: bool = True) -> None: 389 """Drop a view.""" 390 self.execute( 391 exp.Drop(this=exp.to_table(view_name), exists=ignore_if_not_exists, kind="VIEW") 392 )
Drop a view.
394 def columns(self, table_name: TableName) -> t.Dict[str, str]: 395 """Fetches column names and types for the target table.""" 396 self.execute(exp.Describe(this=exp.to_table(table_name), kind="TABLE")) 397 describe_output = self.cursor.fetchall() 398 return { 399 t[0]: t[1] 400 for t in itertools.takewhile( 401 lambda t: not t[0].startswith("#"), 402 describe_output, 403 ) 404 }
Fetches column names and types for the target table.
429 def insert_append( 430 self, 431 table_name: TableName, 432 query_or_df: QueryOrDF, 433 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 434 contains_json: bool = False, 435 ) -> None: 436 if isinstance(query_or_df, QUERY_TYPES): 437 query = t.cast(Query, query_or_df) 438 if contains_json: 439 query = self._escape_json(query) 440 return self._insert_append_query(table_name, query, columns_to_types) 441 if isinstance(query_or_df, pd.DataFrame): 442 return self._insert_append_pandas_df(table_name, query_or_df, columns_to_types) 443 raise SQLMeshError(f"Unsupported type for insert_append: {type(query_or_df)}")
523 def insert_overwrite_by_time_partition( 524 self, 525 table_name: TableName, 526 query_or_df: QueryOrDF, 527 start: TimeLike, 528 end: TimeLike, 529 time_formatter: t.Callable[[TimeLike], exp.Expression], 530 time_column: TimeColumn | exp.Column | str, 531 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 532 ) -> None: 533 low, high = [time_formatter(dt) for dt in make_inclusive(start, end)] 534 if isinstance(time_column, TimeColumn): 535 time_column = time_column.column 536 where = exp.Between( 537 this=exp.to_column(time_column), 538 low=low, 539 high=high, 540 ) 541 return self._insert_overwrite_by_condition(table_name, query_or_df, where, columns_to_types)
568 def update_table( 569 self, 570 table_name: TableName, 571 properties: t.Dict[str, t.Any], 572 where: t.Optional[str | exp.Condition] = None, 573 contains_json: bool = False, 574 ) -> None: 575 if contains_json and properties: 576 properties = { 577 k: self._escape_json(v) 578 if isinstance(v, (str, exp.Subqueryable, exp.DerivedTable)) 579 else v 580 for k, v in properties.items() 581 } 582 self.execute(exp.update(table_name, properties, where=where))
602 def merge( 603 self, 604 target_table: TableName, 605 source_table: QueryOrDF, 606 column_names: t.Iterable[str], 607 unique_key: t.Iterable[str], 608 ) -> None: 609 on = exp.and_( 610 *( 611 exp.EQ( 612 this=exp.column(part, TARGET_ALIAS), 613 expression=exp.column(part, SOURCE_ALIAS), 614 ) 615 for part in unique_key 616 ) 617 ) 618 when_matched = exp.When( 619 matched=True, 620 source=False, 621 then=exp.Update( 622 expressions=[ 623 exp.EQ( 624 this=exp.column(col, TARGET_ALIAS), expression=exp.column(col, SOURCE_ALIAS) 625 ) 626 for col in column_names 627 ], 628 ), 629 ) 630 when_not_matched = exp.When( 631 matched=False, 632 source=False, 633 then=exp.Insert( 634 this=exp.Tuple(expressions=[exp.column(col) for col in column_names]), 635 expression=exp.Tuple( 636 expressions=[exp.column(col, SOURCE_ALIAS) for col in column_names] 637 ), 638 ), 639 ) 640 return self._merge( 641 target_table=target_table, 642 source_table=source_table, 643 on=on, 644 match_expressions=[when_matched, when_not_matched], 645 )
675 def fetchdf(self, query: t.Union[exp.Expression, str]) -> pd.DataFrame: 676 """Fetches a Pandas DataFrame from the cursor""" 677 df = self._fetch_native_df(query) 678 if not isinstance(df, pd.DataFrame): 679 raise NotImplementedError( 680 "The cursor's `fetch_native_df` method is not returning a pandas DataFrame. Need to update `fetchdf` so a Pandas DataFrame is returned" 681 ) 682 return df
Fetches a Pandas DataFrame from the cursor
684 def fetch_pyspark_df(self, query: t.Union[exp.Expression, str]) -> PySparkDataFrame: 685 """Fetches a PySpark DataFrame from the cursor""" 686 raise NotImplementedError(f"Engine does not support PySpark DataFrames: {type(self)}")
Fetches a PySpark DataFrame from the cursor
688 @contextlib.contextmanager 689 def transaction( 690 self, transaction_type: TransactionType = TransactionType.DML 691 ) -> t.Generator[None, None, None]: 692 """A transaction context manager.""" 693 if self._connection_pool.is_transaction_active or not self.supports_transactions( 694 transaction_type 695 ): 696 yield 697 return 698 self._connection_pool.begin() 699 try: 700 yield 701 except Exception as e: 702 self._connection_pool.rollback() 703 raise e 704 else: 705 self._connection_pool.commit()
A transaction context manager.
707 def supports_transactions(self, transaction_type: TransactionType) -> bool: 708 """Whether or not the engine adapter supports transactions for the given transaction type.""" 709 return True
Whether or not the engine adapter supports transactions for the given transaction type.
711 def execute( 712 self, 713 sql: t.Union[str, exp.Expression], 714 ignore_unsupported_errors: bool = False, 715 **kwargs: t.Any, 716 ) -> None: 717 """Execute a sql query.""" 718 to_sql_kwargs = ( 719 {"unsupported_level": ErrorLevel.IGNORE} if ignore_unsupported_errors else {} 720 ) 721 sql = self._to_sql(sql, **to_sql_kwargs) if isinstance(sql, exp.Expression) else sql 722 logger.debug(f"Executing SQL:\n{sql}") 723 self.cursor.execute(sql, **kwargs)
Execute a sql query.
760class EngineAdapterWithIndexSupport(EngineAdapter): 761 def create_index( 762 self, 763 table_name: TableName, 764 index_name: str, 765 columns: t.Tuple[str, ...], 766 exists: bool = True, 767 ) -> None: 768 expression = exp.Create( 769 this=exp.Index( 770 this=exp.to_identifier(index_name), 771 table=exp.to_table(table_name), 772 columns=exp.Tuple( 773 expressions=[exp.to_column(c) for c in columns], 774 ), 775 ), 776 kind="INDEX", 777 exists=exists, 778 ) 779 self.execute(expression) 780 781 def _create_table_from_columns( 782 self, 783 table_name: TableName, 784 columns_to_types: t.Dict[str, exp.DataType], 785 primary_key: t.Optional[t.Tuple[str, ...]] = None, 786 exists: bool = True, 787 **kwargs: t.Any, 788 ) -> t.Optional[exp.Create]: 789 expression = super()._create_table_from_columns( 790 table_name, columns_to_types, primary_key, exists, **kwargs 791 ) 792 if expression is None or primary_key is None: 793 return expression 794 795 schema = expression.this 796 schema.append( 797 "expressions", 798 exp.Anonymous(this="PRIMARY KEY", expressions=[exp.to_column(k) for k in primary_key]), 799 ) 800 return expression
Base class wrapping a Database API compliant connection.
The EngineAdapter is an easily-subclassable interface that interacts with the underlying engine and data store.
Arguments:
- connection_factory: a callable which produces a new Database API-compliant connection on every call.
- dialect: The dialect with which this adapter is associated.
- multithreaded: Indicates whether this adapter will be used by more than one thread.
761 def create_index( 762 self, 763 table_name: TableName, 764 index_name: str, 765 columns: t.Tuple[str, ...], 766 exists: bool = True, 767 ) -> None: 768 expression = exp.Create( 769 this=exp.Index( 770 this=exp.to_identifier(index_name), 771 table=exp.to_table(table_name), 772 columns=exp.Tuple( 773 expressions=[exp.to_column(c) for c in columns], 774 ), 775 ), 776 kind="INDEX", 777 exists=exists, 778 ) 779 self.execute(expression)
Creates a new index for the given table.
Arguments:
- table_name: The name of the target table.
- index_name: The name of the index.
- columns: The list of columns that constitute the index.
- exists: Indicates whether to include the IF NOT EXISTS check.
Inherited Members
- EngineAdapter
- EngineAdapter
- recycle
- close
- replace_query
- create_table
- create_state_table
- create_table_like
- drop_table
- alter_table
- create_view
- create_schema
- drop_schema
- drop_view
- columns
- table_exists
- delete_from
- insert_append
- insert_overwrite_by_time_partition
- update_table
- merge
- rename_table
- fetchone
- fetchall
- fetchdf
- fetch_pyspark_df
- transaction
- supports_transactions
- execute