Edit on GitHub

sqlmesh.core.config.connection

  1from __future__ import annotations
  2
  3import abc
  4import sys
  5import typing as t
  6
  7from pydantic import Field
  8
  9from sqlmesh.core import engine_adapter
 10from sqlmesh.core.config.base import BaseConfig
 11from sqlmesh.core.config.common import (
 12    concurrent_tasks_validator,
 13    http_headers_validator,
 14)
 15from sqlmesh.core.engine_adapter import EngineAdapter
 16
 17if sys.version_info >= (3, 9):
 18    from typing import Annotated, Literal
 19else:
 20    from typing_extensions import Annotated, Literal
 21
 22
 23class _ConnectionConfig(abc.ABC, BaseConfig):
 24    concurrent_tasks: int
 25
 26    @property
 27    @abc.abstractmethod
 28    def _connection_kwargs_keys(self) -> t.Set[str]:
 29        """keywords that should be passed into the connection"""
 30
 31    @property
 32    @abc.abstractmethod
 33    def _engine_adapter(self) -> t.Type[EngineAdapter]:
 34        """The engine adapter for this connection"""
 35
 36    @property
 37    @abc.abstractmethod
 38    def _connection_factory(self) -> t.Callable:
 39        """A function that is called to return a connection object for the given Engine Adapter"""
 40
 41    @property
 42    def _static_connection_kwargs(self) -> t.Dict[str, t.Any]:
 43        """The static connection kwargs for this connection"""
 44        return {}
 45
 46    def create_engine_adapter(self) -> EngineAdapter:
 47        """Returns a new instance of the Engine Adapter."""
 48        return self._engine_adapter(
 49            lambda: self._connection_factory(
 50                **{
 51                    **self._static_connection_kwargs,
 52                    **{k: v for k, v in self.dict().items() if k in self._connection_kwargs_keys},
 53                }
 54            ),
 55            multithreaded=self.concurrent_tasks > 1,
 56        )
 57
 58
 59class DuckDBConnectionConfig(_ConnectionConfig):
 60    """Configuration for the DuckDB connection.
 61
 62    Args:
 63        database: The optional database name. If not specified, the in-memory database will be used.
 64        concurrent_tasks: The maximum number of tasks that can use this connection concurrently.
 65    """
 66
 67    database: t.Optional[str]
 68
 69    concurrent_tasks: Literal[1] = 1
 70
 71    type_: Literal["duckdb"] = Field(alias="type", default="duckdb")
 72
 73    @property
 74    def _connection_kwargs_keys(self) -> t.Set[str]:
 75        return {"database"}
 76
 77    @property
 78    def _engine_adapter(self) -> t.Type[EngineAdapter]:
 79        return engine_adapter.DuckDBEngineAdapter
 80
 81    @property
 82    def _connection_factory(self) -> t.Callable:
 83        import duckdb
 84
 85        return duckdb.connect
 86
 87
 88class SnowflakeConnectionConfig(_ConnectionConfig):
 89    """Configuration for the Snowflake connection.
 90
 91    Args:
 92        user: The Snowflake username.
 93        password: The Snowflake password.
 94        account: The Snowflake account name.
 95        warehouse: The optional warehouse name.
 96        database: The optional database name.
 97        role: The optional role name.
 98        concurrent_tasks: The maximum number of tasks that can use this connection concurrently.
 99    """
100
101    user: str
102    password: str
103    account: str
104    warehouse: t.Optional[str]
105    database: t.Optional[str]
106    role: t.Optional[str]
107
108    concurrent_tasks: int = 4
109
110    type_: Literal["snowflake"] = Field(alias="type", default="snowflake")
111
112    _concurrent_tasks_validator = concurrent_tasks_validator
113
114    @property
115    def _connection_kwargs_keys(self) -> t.Set[str]:
116        return {"user", "password", "account", "warehouse", "database", "role"}
117
118    @property
119    def _engine_adapter(self) -> t.Type[EngineAdapter]:
120        return engine_adapter.SnowflakeEngineAdapter
121
122    @property
123    def _connection_factory(self) -> t.Callable:
124        from snowflake import connector
125
126        return connector.connect
127
128
129class DatabricksSQLConnectionConfig(_ConnectionConfig):
130    """
131    Configuration for the Databricks API connection. This connection is used to access the Databricks
132    when you don't have access to a SparkSession. Ex: Running Jupyter locally on your laptop to connect to a
133    Databricks cluster
134
135    Arg Source: https://github.com/databricks/databricks-sql-python/blob/main/src/databricks/sql/client.py#L39
136    Args:
137        server_hostname: Databricks instance host name.
138        http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef)
139                   or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123)
140        access_token: Http Bearer access token, e.g. Databricks Personal Access Token.
141        http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request
142        session_configuration: An optional dictionary of Spark session parameters. Defaults to None.
143               Execute the SQL command `SET -v` to get a full list of available commands.
144    """
145
146    server_hostname: str
147    http_path: str
148    access_token: str
149    http_headers: t.Optional[t.List[t.Tuple[str, str]]]
150    session_configuration: t.Optional[t.Dict[str, t.Any]]
151
152    concurrent_tasks: int = 4
153
154    type_: Literal["databricks_sql"] = Field(alias="type", default="databricks_sql")
155
156    _concurrent_tasks_validator = concurrent_tasks_validator
157    _http_headers_validator = http_headers_validator
158
159    @property
160    def _connection_kwargs_keys(self) -> t.Set[str]:
161        return {
162            "server_hostname",
163            "http_path",
164            "access_token",
165            "http_headers",
166            "session_configuration",
167        }
168
169    @property
170    def _engine_adapter(self) -> t.Type[EngineAdapter]:
171        return engine_adapter.DatabricksSQLEngineAdapter
172
173    @property
174    def _connection_factory(self) -> t.Callable:
175        from databricks import sql
176
177        return sql.connect
178
179
180class DatabricksSparkSessionConnectionConfig(_ConnectionConfig):
181    """
182    Configuration for the Databricks connection. This connection is used to access the Databricks
183    when you have access to a SparkSession. Ex: Running in a Databricks notebook or cluster
184
185    Args:
186        spark_config: An optional dictionary of Spark session parameters. Defaults to None.
187    """
188
189    spark_config: t.Optional[t.Dict[str, str]] = None
190
191    concurrent_tasks: Literal[1] = 1
192
193    type_: Literal["databricks_spark_session"] = Field(
194        alias="type", default="databricks_spark_session"
195    )
196
197    @property
198    def _connection_kwargs_keys(self) -> t.Set[str]:
199        return set()
200
201    @property
202    def _engine_adapter(self) -> t.Type[EngineAdapter]:
203        return engine_adapter.DatabricksSparkSessionEngineAdapter
204
205    @property
206    def _connection_factory(self) -> t.Callable:
207        from sqlmesh.engines.spark.db_api.spark_session import connection
208
209        return connection
210
211    @property
212    def _static_connection_kwargs(self) -> t.Dict[str, t.Any]:
213        from pyspark import SparkConf
214        from pyspark.sql import SparkSession
215
216        spark_config = SparkConf()
217        if self.spark_config:
218            for k, v in self.spark_config.items():
219                spark_config.set(k, v)
220
221        return dict(
222            spark=SparkSession.builder.config(conf=spark_config).enableHiveSupport().getOrCreate()
223        )
224
225
226class DatabricksConnectionConfig(_ConnectionConfig):
227    """
228    Databricks connection that prefers to use SparkSession if available, otherwise it will use the Databricks API.
229
230    Arg Source: https://github.com/databricks/databricks-sql-python/blob/main/src/databricks/sql/client.py#L39
231    Args:
232        server_hostname: Databricks instance host name.
233        http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef)
234                   or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123)
235        access_token: Http Bearer access token, e.g. Databricks Personal Access Token.
236        http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request
237        session_configuration: An optional dictionary of Spark session parameters. Defaults to None.
238               Execute the SQL command `SET -v` to get a full list of available commands.
239        spark_config: An optional dictionary of Spark session parameters. Defaults to None.
240    """
241
242    server_hostname: str
243    http_path: str
244    access_token: str
245    http_headers: t.Optional[t.List[t.Tuple[str, str]]]
246    session_configuration: t.Optional[t.Dict[str, t.Any]]
247    spark_config: t.Optional[t.Dict[str, str]] = None
248
249    concurrent_tasks: int = 4
250
251    type_: Literal["databricks"] = Field(alias="type", default="databricks")
252
253    _concurrent_tasks_validator = concurrent_tasks_validator
254    _http_headers_validator = http_headers_validator
255
256    _has_spark_session_access: bool
257
258    class Config:
259        allow_mutation = True
260
261    @property
262    def has_spark_session_access(self) -> bool:
263        if not getattr(self, "_has_spark_session_access", None):
264            try:
265                from pyspark.sql import SparkSession
266
267                spark = SparkSession.getActiveSession()
268                if spark:
269                    self._has_spark_session_access = True
270                    self.concurrent_tasks = 1
271                else:
272                    self._has_spark_session_access = False
273            except ImportError:
274                self._has_spark_session_access = False
275        return self._has_spark_session_access
276
277    @property
278    def _connection_kwargs_keys(self) -> t.Set[str]:
279        if self.has_spark_session_access:
280            return set()
281        return {
282            "server_hostname",
283            "http_path",
284            "access_token",
285            "http_headers",
286            "session_configuration",
287        }
288
289    @property
290    def _engine_adapter(self) -> t.Type[EngineAdapter]:
291        if self.has_spark_session_access:
292            return engine_adapter.DatabricksSparkSessionEngineAdapter
293        return engine_adapter.DatabricksSQLEngineAdapter
294
295    @property
296    def _connection_factory(self) -> t.Callable:
297        if self.has_spark_session_access:
298            from sqlmesh.engines.spark.db_api.spark_session import connection
299
300            return connection
301        from databricks import sql
302
303        return sql.connect
304
305    @property
306    def _static_connection_kwargs(self) -> t.Dict[str, t.Any]:
307        if self.has_spark_session_access:
308            from pyspark import SparkConf
309            from pyspark.sql import SparkSession
310
311            spark_config = SparkConf()
312            if self.spark_config:
313                for k, v in self.spark_config.items():
314                    spark_config.set(k, v)
315
316            return dict(
317                spark=SparkSession.builder.config(conf=spark_config)
318                .enableHiveSupport()
319                .getOrCreate()
320            )
321        return {}
322
323
324class BigQueryConnectionConfig(_ConnectionConfig):
325    """
326    BigQuery Connection Configuration.
327
328    TODO: Need to update to support all the different authentication options
329    """
330
331    concurrent_tasks: int = 4
332
333    type_: Literal["bigquery"] = Field(alias="type", default="bigquery")
334
335    @property
336    def _connection_kwargs_keys(self) -> t.Set[str]:
337        return set()
338
339    @property
340    def _engine_adapter(self) -> t.Type[EngineAdapter]:
341        return engine_adapter.BigQueryEngineAdapter
342
343    @property
344    def _connection_factory(self) -> t.Callable:
345        from google.cloud.bigquery.dbapi import connect
346
347        return connect
348
349
350class RedshiftConnectionConfig(_ConnectionConfig):
351    """
352    Redshift Connection Configuration.
353
354    Arg Source: https://github.com/aws/amazon-redshift-python-driver/blob/master/redshift_connector/__init__.py#L146
355    Note: A subset of properties were selected. Please open an issue/PR if you want to see more supported.
356
357    Args:
358        user: The username to use for authentication with the Amazon Redshift cluster.
359        password: The password to use for authentication with the Amazon Redshift cluster.
360        database: The name of the database instance to connect to.
361        host: The hostname of the Amazon Redshift cluster.
362        port: The port number of the Amazon Redshift cluster. Default value is 5439.
363        source_address: No description provided
364        unix_sock: No description provided
365        ssl: Is SSL enabled. Default value is ``True``. SSL must be enabled when authenticating using IAM.
366        sslmode: The security of the connection to the Amazon Redshift cluster. 'verify-ca' and 'verify-full' are supported.
367        timeout: The number of seconds before the connection to the server will timeout. By default there is no timeout.
368        tcp_keepalive: Is `TCP keepalive <https://en.wikipedia.org/wiki/Keepalive#TCP_keepalive>`_ used. The default value is ``True``.
369        application_name: Sets the application name. The default value is None.
370        preferred_role: The IAM role preferred for the current connection.
371        principal_arn: The ARN of the IAM entity (user or role) for which you are generating a policy.
372        credentials_provider: The class name of the IdP that will be used for authenticating with the Amazon Redshift cluster.
373        region: The AWS region where the Amazon Redshift cluster is located.
374        cluster_identifier: The cluster identifier of the Amazon Redshift cluster.
375        iam: If IAM authentication is enabled. Default value is False. IAM must be True when authenticating using an IdP.
376        is_serverless: Redshift end-point is serverless or provisional. Default value false.
377        serverless_acct_id: The account ID of the serverless. Default value None
378        serverless_work_group: The name of work group for serverless end point. Default value None.
379    """
380
381    user: t.Optional[str]
382    password: t.Optional[str]
383    database: t.Optional[str]
384    host: t.Optional[str]
385    port: t.Optional[int]
386    source_address: t.Optional[str]
387    unix_sock: t.Optional[str]
388    ssl: t.Optional[bool]
389    sslmode: t.Optional[str]
390    timeout: t.Optional[int]
391    tcp_keepalive: t.Optional[bool]
392    application_name: t.Optional[str]
393    preferred_role: t.Optional[str]
394    principal_arn: t.Optional[str]
395    credentials_provider: t.Optional[str]
396    region: t.Optional[str]
397    cluster_identifier: t.Optional[str]
398    iam: t.Optional[bool]
399    is_serverless: t.Optional[bool]
400    serverless_acct_id: t.Optional[str]
401    serverless_work_group: t.Optional[str]
402
403    concurrent_tasks: int = 4
404
405    type_: Literal["redshift"] = Field(alias="type", default="redshift")
406
407    @property
408    def _connection_kwargs_keys(self) -> t.Set[str]:
409        return {
410            "user",
411            "password",
412            "database",
413            "host",
414            "port",
415            "source_address",
416            "unix_sock",
417            "ssl",
418            "sslmode",
419            "timeout",
420            "tcp_keepalive",
421            "application_name",
422            "preferred_role",
423            "principal_arn",
424            "credentials_provider",
425            "region",
426            "cluster_identifier",
427            "iam",
428            "is_serverless",
429            "serverless_acct_id",
430            "serverless_work_group",
431        }
432
433    @property
434    def _engine_adapter(self) -> t.Type[EngineAdapter]:
435        return engine_adapter.RedshiftEngineAdapter
436
437    @property
438    def _connection_factory(self) -> t.Callable:
439        from redshift_connector import connect
440
441        return connect
442
443
444ConnectionConfig = Annotated[
445    t.Union[
446        DuckDBConnectionConfig,
447        SnowflakeConnectionConfig,
448        DatabricksSQLConnectionConfig,
449        DatabricksSparkSessionConnectionConfig,
450        DatabricksConnectionConfig,
451        BigQueryConnectionConfig,
452        RedshiftConnectionConfig,
453    ],
454    Field(discriminator="type_"),
455]
class DuckDBConnectionConfig(_ConnectionConfig):
60class DuckDBConnectionConfig(_ConnectionConfig):
61    """Configuration for the DuckDB connection.
62
63    Args:
64        database: The optional database name. If not specified, the in-memory database will be used.
65        concurrent_tasks: The maximum number of tasks that can use this connection concurrently.
66    """
67
68    database: t.Optional[str]
69
70    concurrent_tasks: Literal[1] = 1
71
72    type_: Literal["duckdb"] = Field(alias="type", default="duckdb")
73
74    @property
75    def _connection_kwargs_keys(self) -> t.Set[str]:
76        return {"database"}
77
78    @property
79    def _engine_adapter(self) -> t.Type[EngineAdapter]:
80        return engine_adapter.DuckDBEngineAdapter
81
82    @property
83    def _connection_factory(self) -> t.Callable:
84        import duckdb
85
86        return duckdb.connect

Configuration for the DuckDB connection.

Arguments:
  • database: The optional database name. If not specified, the in-memory database will be used.
  • concurrent_tasks: The maximum number of tasks that can use this connection concurrently.
Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
_ConnectionConfig
create_engine_adapter
sqlmesh.core.config.base.BaseConfig
update_with
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class SnowflakeConnectionConfig(_ConnectionConfig):
 89class SnowflakeConnectionConfig(_ConnectionConfig):
 90    """Configuration for the Snowflake connection.
 91
 92    Args:
 93        user: The Snowflake username.
 94        password: The Snowflake password.
 95        account: The Snowflake account name.
 96        warehouse: The optional warehouse name.
 97        database: The optional database name.
 98        role: The optional role name.
 99        concurrent_tasks: The maximum number of tasks that can use this connection concurrently.
100    """
101
102    user: str
103    password: str
104    account: str
105    warehouse: t.Optional[str]
106    database: t.Optional[str]
107    role: t.Optional[str]
108
109    concurrent_tasks: int = 4
110
111    type_: Literal["snowflake"] = Field(alias="type", default="snowflake")
112
113    _concurrent_tasks_validator = concurrent_tasks_validator
114
115    @property
116    def _connection_kwargs_keys(self) -> t.Set[str]:
117        return {"user", "password", "account", "warehouse", "database", "role"}
118
119    @property
120    def _engine_adapter(self) -> t.Type[EngineAdapter]:
121        return engine_adapter.SnowflakeEngineAdapter
122
123    @property
124    def _connection_factory(self) -> t.Callable:
125        from snowflake import connector
126
127        return connector.connect

Configuration for the Snowflake connection.

Arguments:
  • user: The Snowflake username.
  • password: The Snowflake password.
  • account: The Snowflake account name.
  • warehouse: The optional warehouse name.
  • database: The optional database name.
  • role: The optional role name.
  • concurrent_tasks: The maximum number of tasks that can use this connection concurrently.
Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
_ConnectionConfig
create_engine_adapter
sqlmesh.core.config.base.BaseConfig
update_with
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class DatabricksSQLConnectionConfig(_ConnectionConfig):
130class DatabricksSQLConnectionConfig(_ConnectionConfig):
131    """
132    Configuration for the Databricks API connection. This connection is used to access the Databricks
133    when you don't have access to a SparkSession. Ex: Running Jupyter locally on your laptop to connect to a
134    Databricks cluster
135
136    Arg Source: https://github.com/databricks/databricks-sql-python/blob/main/src/databricks/sql/client.py#L39
137    Args:
138        server_hostname: Databricks instance host name.
139        http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef)
140                   or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123)
141        access_token: Http Bearer access token, e.g. Databricks Personal Access Token.
142        http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request
143        session_configuration: An optional dictionary of Spark session parameters. Defaults to None.
144               Execute the SQL command `SET -v` to get a full list of available commands.
145    """
146
147    server_hostname: str
148    http_path: str
149    access_token: str
150    http_headers: t.Optional[t.List[t.Tuple[str, str]]]
151    session_configuration: t.Optional[t.Dict[str, t.Any]]
152
153    concurrent_tasks: int = 4
154
155    type_: Literal["databricks_sql"] = Field(alias="type", default="databricks_sql")
156
157    _concurrent_tasks_validator = concurrent_tasks_validator
158    _http_headers_validator = http_headers_validator
159
160    @property
161    def _connection_kwargs_keys(self) -> t.Set[str]:
162        return {
163            "server_hostname",
164            "http_path",
165            "access_token",
166            "http_headers",
167            "session_configuration",
168        }
169
170    @property
171    def _engine_adapter(self) -> t.Type[EngineAdapter]:
172        return engine_adapter.DatabricksSQLEngineAdapter
173
174    @property
175    def _connection_factory(self) -> t.Callable:
176        from databricks import sql
177
178        return sql.connect

Configuration for the Databricks API connection. This connection is used to access the Databricks when you don't have access to a SparkSession. Ex: Running Jupyter locally on your laptop to connect to a Databricks cluster

Arg Source: https://github.com/databricks/databricks-sql-python/blob/main/src/databricks/sql/client.py#L39

Arguments:
  • server_hostname: Databricks instance host name.
  • http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef) or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123)
  • access_token: Http Bearer access token, e.g. Databricks Personal Access Token.
  • http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request
  • session_configuration: An optional dictionary of Spark session parameters. Defaults to None. Execute the SQL command SET -v to get a full list of available commands.
Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
_ConnectionConfig
create_engine_adapter
sqlmesh.core.config.base.BaseConfig
update_with
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class DatabricksSparkSessionConnectionConfig(_ConnectionConfig):
181class DatabricksSparkSessionConnectionConfig(_ConnectionConfig):
182    """
183    Configuration for the Databricks connection. This connection is used to access the Databricks
184    when you have access to a SparkSession. Ex: Running in a Databricks notebook or cluster
185
186    Args:
187        spark_config: An optional dictionary of Spark session parameters. Defaults to None.
188    """
189
190    spark_config: t.Optional[t.Dict[str, str]] = None
191
192    concurrent_tasks: Literal[1] = 1
193
194    type_: Literal["databricks_spark_session"] = Field(
195        alias="type", default="databricks_spark_session"
196    )
197
198    @property
199    def _connection_kwargs_keys(self) -> t.Set[str]:
200        return set()
201
202    @property
203    def _engine_adapter(self) -> t.Type[EngineAdapter]:
204        return engine_adapter.DatabricksSparkSessionEngineAdapter
205
206    @property
207    def _connection_factory(self) -> t.Callable:
208        from sqlmesh.engines.spark.db_api.spark_session import connection
209
210        return connection
211
212    @property
213    def _static_connection_kwargs(self) -> t.Dict[str, t.Any]:
214        from pyspark import SparkConf
215        from pyspark.sql import SparkSession
216
217        spark_config = SparkConf()
218        if self.spark_config:
219            for k, v in self.spark_config.items():
220                spark_config.set(k, v)
221
222        return dict(
223            spark=SparkSession.builder.config(conf=spark_config).enableHiveSupport().getOrCreate()
224        )

Configuration for the Databricks connection. This connection is used to access the Databricks when you have access to a SparkSession. Ex: Running in a Databricks notebook or cluster

Arguments:
  • spark_config: An optional dictionary of Spark session parameters. Defaults to None.
Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
_ConnectionConfig
create_engine_adapter
sqlmesh.core.config.base.BaseConfig
update_with
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class DatabricksConnectionConfig(_ConnectionConfig):
227class DatabricksConnectionConfig(_ConnectionConfig):
228    """
229    Databricks connection that prefers to use SparkSession if available, otherwise it will use the Databricks API.
230
231    Arg Source: https://github.com/databricks/databricks-sql-python/blob/main/src/databricks/sql/client.py#L39
232    Args:
233        server_hostname: Databricks instance host name.
234        http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef)
235                   or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123)
236        access_token: Http Bearer access token, e.g. Databricks Personal Access Token.
237        http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request
238        session_configuration: An optional dictionary of Spark session parameters. Defaults to None.
239               Execute the SQL command `SET -v` to get a full list of available commands.
240        spark_config: An optional dictionary of Spark session parameters. Defaults to None.
241    """
242
243    server_hostname: str
244    http_path: str
245    access_token: str
246    http_headers: t.Optional[t.List[t.Tuple[str, str]]]
247    session_configuration: t.Optional[t.Dict[str, t.Any]]
248    spark_config: t.Optional[t.Dict[str, str]] = None
249
250    concurrent_tasks: int = 4
251
252    type_: Literal["databricks"] = Field(alias="type", default="databricks")
253
254    _concurrent_tasks_validator = concurrent_tasks_validator
255    _http_headers_validator = http_headers_validator
256
257    _has_spark_session_access: bool
258
259    class Config:
260        allow_mutation = True
261
262    @property
263    def has_spark_session_access(self) -> bool:
264        if not getattr(self, "_has_spark_session_access", None):
265            try:
266                from pyspark.sql import SparkSession
267
268                spark = SparkSession.getActiveSession()
269                if spark:
270                    self._has_spark_session_access = True
271                    self.concurrent_tasks = 1
272                else:
273                    self._has_spark_session_access = False
274            except ImportError:
275                self._has_spark_session_access = False
276        return self._has_spark_session_access
277
278    @property
279    def _connection_kwargs_keys(self) -> t.Set[str]:
280        if self.has_spark_session_access:
281            return set()
282        return {
283            "server_hostname",
284            "http_path",
285            "access_token",
286            "http_headers",
287            "session_configuration",
288        }
289
290    @property
291    def _engine_adapter(self) -> t.Type[EngineAdapter]:
292        if self.has_spark_session_access:
293            return engine_adapter.DatabricksSparkSessionEngineAdapter
294        return engine_adapter.DatabricksSQLEngineAdapter
295
296    @property
297    def _connection_factory(self) -> t.Callable:
298        if self.has_spark_session_access:
299            from sqlmesh.engines.spark.db_api.spark_session import connection
300
301            return connection
302        from databricks import sql
303
304        return sql.connect
305
306    @property
307    def _static_connection_kwargs(self) -> t.Dict[str, t.Any]:
308        if self.has_spark_session_access:
309            from pyspark import SparkConf
310            from pyspark.sql import SparkSession
311
312            spark_config = SparkConf()
313            if self.spark_config:
314                for k, v in self.spark_config.items():
315                    spark_config.set(k, v)
316
317            return dict(
318                spark=SparkSession.builder.config(conf=spark_config)
319                .enableHiveSupport()
320                .getOrCreate()
321            )
322        return {}

Databricks connection that prefers to use SparkSession if available, otherwise it will use the Databricks API.

Arg Source: https://github.com/databricks/databricks-sql-python/blob/main/src/databricks/sql/client.py#L39

Arguments:
  • server_hostname: Databricks instance host name.
  • http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef) or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123)
  • access_token: Http Bearer access token, e.g. Databricks Personal Access Token.
  • http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request
  • session_configuration: An optional dictionary of Spark session parameters. Defaults to None. Execute the SQL command SET -v to get a full list of available commands.
  • spark_config: An optional dictionary of Spark session parameters. Defaults to None.
Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
_ConnectionConfig
create_engine_adapter
sqlmesh.core.config.base.BaseConfig
update_with
sqlmesh.utils.pydantic.PydanticModel
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class DatabricksConnectionConfig.Config:
259    class Config:
260        allow_mutation = True
DatabricksConnectionConfig.Config()
class BigQueryConnectionConfig(_ConnectionConfig):
325class BigQueryConnectionConfig(_ConnectionConfig):
326    """
327    BigQuery Connection Configuration.
328
329    TODO: Need to update to support all the different authentication options
330    """
331
332    concurrent_tasks: int = 4
333
334    type_: Literal["bigquery"] = Field(alias="type", default="bigquery")
335
336    @property
337    def _connection_kwargs_keys(self) -> t.Set[str]:
338        return set()
339
340    @property
341    def _engine_adapter(self) -> t.Type[EngineAdapter]:
342        return engine_adapter.BigQueryEngineAdapter
343
344    @property
345    def _connection_factory(self) -> t.Callable:
346        from google.cloud.bigquery.dbapi import connect
347
348        return connect

BigQuery Connection Configuration.

TODO: Need to update to support all the different authentication options

Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
_ConnectionConfig
create_engine_adapter
sqlmesh.core.config.base.BaseConfig
update_with
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields
class RedshiftConnectionConfig(_ConnectionConfig):
351class RedshiftConnectionConfig(_ConnectionConfig):
352    """
353    Redshift Connection Configuration.
354
355    Arg Source: https://github.com/aws/amazon-redshift-python-driver/blob/master/redshift_connector/__init__.py#L146
356    Note: A subset of properties were selected. Please open an issue/PR if you want to see more supported.
357
358    Args:
359        user: The username to use for authentication with the Amazon Redshift cluster.
360        password: The password to use for authentication with the Amazon Redshift cluster.
361        database: The name of the database instance to connect to.
362        host: The hostname of the Amazon Redshift cluster.
363        port: The port number of the Amazon Redshift cluster. Default value is 5439.
364        source_address: No description provided
365        unix_sock: No description provided
366        ssl: Is SSL enabled. Default value is ``True``. SSL must be enabled when authenticating using IAM.
367        sslmode: The security of the connection to the Amazon Redshift cluster. 'verify-ca' and 'verify-full' are supported.
368        timeout: The number of seconds before the connection to the server will timeout. By default there is no timeout.
369        tcp_keepalive: Is `TCP keepalive <https://en.wikipedia.org/wiki/Keepalive#TCP_keepalive>`_ used. The default value is ``True``.
370        application_name: Sets the application name. The default value is None.
371        preferred_role: The IAM role preferred for the current connection.
372        principal_arn: The ARN of the IAM entity (user or role) for which you are generating a policy.
373        credentials_provider: The class name of the IdP that will be used for authenticating with the Amazon Redshift cluster.
374        region: The AWS region where the Amazon Redshift cluster is located.
375        cluster_identifier: The cluster identifier of the Amazon Redshift cluster.
376        iam: If IAM authentication is enabled. Default value is False. IAM must be True when authenticating using an IdP.
377        is_serverless: Redshift end-point is serverless or provisional. Default value false.
378        serverless_acct_id: The account ID of the serverless. Default value None
379        serverless_work_group: The name of work group for serverless end point. Default value None.
380    """
381
382    user: t.Optional[str]
383    password: t.Optional[str]
384    database: t.Optional[str]
385    host: t.Optional[str]
386    port: t.Optional[int]
387    source_address: t.Optional[str]
388    unix_sock: t.Optional[str]
389    ssl: t.Optional[bool]
390    sslmode: t.Optional[str]
391    timeout: t.Optional[int]
392    tcp_keepalive: t.Optional[bool]
393    application_name: t.Optional[str]
394    preferred_role: t.Optional[str]
395    principal_arn: t.Optional[str]
396    credentials_provider: t.Optional[str]
397    region: t.Optional[str]
398    cluster_identifier: t.Optional[str]
399    iam: t.Optional[bool]
400    is_serverless: t.Optional[bool]
401    serverless_acct_id: t.Optional[str]
402    serverless_work_group: t.Optional[str]
403
404    concurrent_tasks: int = 4
405
406    type_: Literal["redshift"] = Field(alias="type", default="redshift")
407
408    @property
409    def _connection_kwargs_keys(self) -> t.Set[str]:
410        return {
411            "user",
412            "password",
413            "database",
414            "host",
415            "port",
416            "source_address",
417            "unix_sock",
418            "ssl",
419            "sslmode",
420            "timeout",
421            "tcp_keepalive",
422            "application_name",
423            "preferred_role",
424            "principal_arn",
425            "credentials_provider",
426            "region",
427            "cluster_identifier",
428            "iam",
429            "is_serverless",
430            "serverless_acct_id",
431            "serverless_work_group",
432        }
433
434    @property
435    def _engine_adapter(self) -> t.Type[EngineAdapter]:
436        return engine_adapter.RedshiftEngineAdapter
437
438    @property
439    def _connection_factory(self) -> t.Callable:
440        from redshift_connector import connect
441
442        return connect

Redshift Connection Configuration.

Arg Source: https://github.com/aws/amazon-redshift-python-driver/blob/master/redshift_connector/__init__.py#L146 Note: A subset of properties were selected. Please open an issue/PR if you want to see more supported.

Arguments:
  • user: The username to use for authentication with the Amazon Redshift cluster.
  • password: The password to use for authentication with the Amazon Redshift cluster.
  • database: The name of the database instance to connect to.
  • host: The hostname of the Amazon Redshift cluster.
  • port: The port number of the Amazon Redshift cluster. Default value is 5439.
  • source_address: No description provided
  • unix_sock: No description provided
  • ssl: Is SSL enabled. Default value is True. SSL must be enabled when authenticating using IAM.
  • sslmode: The security of the connection to the Amazon Redshift cluster. 'verify-ca' and 'verify-full' are supported.
  • timeout: The number of seconds before the connection to the server will timeout. By default there is no timeout.
  • tcp_keepalive: Is TCP keepalive used. The default value is True.
  • application_name: Sets the application name. The default value is None.
  • preferred_role: The IAM role preferred for the current connection.
  • principal_arn: The ARN of the IAM entity (user or role) for which you are generating a policy.
  • credentials_provider: The class name of the IdP that will be used for authenticating with the Amazon Redshift cluster.
  • region: The AWS region where the Amazon Redshift cluster is located.
  • cluster_identifier: The cluster identifier of the Amazon Redshift cluster.
  • iam: If IAM authentication is enabled. Default value is False. IAM must be True when authenticating using an IdP.
  • is_serverless: Redshift end-point is serverless or provisional. Default value false.
  • serverless_acct_id: The account ID of the serverless. Default value None
  • serverless_work_group: The name of work group for serverless end point. Default value None.
Inherited Members
pydantic.main.BaseModel
BaseModel
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
_ConnectionConfig
create_engine_adapter
sqlmesh.core.config.base.BaseConfig
update_with
sqlmesh.utils.pydantic.PydanticModel
Config
dict
json
missing_required_fields
extra_fields
all_fields
required_fields