sqlmesh.core.config.connection
1from __future__ import annotations 2 3import abc 4import sys 5import typing as t 6 7from pydantic import Field 8 9from sqlmesh.core import engine_adapter 10from sqlmesh.core.config.base import BaseConfig 11from sqlmesh.core.config.common import ( 12 concurrent_tasks_validator, 13 http_headers_validator, 14) 15from sqlmesh.core.engine_adapter import EngineAdapter 16 17if sys.version_info >= (3, 9): 18 from typing import Annotated, Literal 19else: 20 from typing_extensions import Annotated, Literal 21 22 23class _ConnectionConfig(abc.ABC, BaseConfig): 24 concurrent_tasks: int 25 26 @property 27 @abc.abstractmethod 28 def _connection_kwargs_keys(self) -> t.Set[str]: 29 """keywords that should be passed into the connection""" 30 31 @property 32 @abc.abstractmethod 33 def _engine_adapter(self) -> t.Type[EngineAdapter]: 34 """The engine adapter for this connection""" 35 36 @property 37 @abc.abstractmethod 38 def _connection_factory(self) -> t.Callable: 39 """A function that is called to return a connection object for the given Engine Adapter""" 40 41 @property 42 def _static_connection_kwargs(self) -> t.Dict[str, t.Any]: 43 """The static connection kwargs for this connection""" 44 return {} 45 46 def create_engine_adapter(self) -> EngineAdapter: 47 """Returns a new instance of the Engine Adapter.""" 48 return self._engine_adapter( 49 lambda: self._connection_factory( 50 **{ 51 **self._static_connection_kwargs, 52 **{k: v for k, v in self.dict().items() if k in self._connection_kwargs_keys}, 53 } 54 ), 55 multithreaded=self.concurrent_tasks > 1, 56 ) 57 58 59class DuckDBConnectionConfig(_ConnectionConfig): 60 """Configuration for the DuckDB connection. 61 62 Args: 63 database: The optional database name. If not specified, the in-memory database will be used. 64 concurrent_tasks: The maximum number of tasks that can use this connection concurrently. 65 """ 66 67 database: t.Optional[str] 68 69 concurrent_tasks: Literal[1] = 1 70 71 type_: Literal["duckdb"] = Field(alias="type", default="duckdb") 72 73 @property 74 def _connection_kwargs_keys(self) -> t.Set[str]: 75 return {"database"} 76 77 @property 78 def _engine_adapter(self) -> t.Type[EngineAdapter]: 79 return engine_adapter.DuckDBEngineAdapter 80 81 @property 82 def _connection_factory(self) -> t.Callable: 83 import duckdb 84 85 return duckdb.connect 86 87 88class SnowflakeConnectionConfig(_ConnectionConfig): 89 """Configuration for the Snowflake connection. 90 91 Args: 92 user: The Snowflake username. 93 password: The Snowflake password. 94 account: The Snowflake account name. 95 warehouse: The optional warehouse name. 96 database: The optional database name. 97 role: The optional role name. 98 concurrent_tasks: The maximum number of tasks that can use this connection concurrently. 99 """ 100 101 user: str 102 password: str 103 account: str 104 warehouse: t.Optional[str] 105 database: t.Optional[str] 106 role: t.Optional[str] 107 108 concurrent_tasks: int = 4 109 110 type_: Literal["snowflake"] = Field(alias="type", default="snowflake") 111 112 _concurrent_tasks_validator = concurrent_tasks_validator 113 114 @property 115 def _connection_kwargs_keys(self) -> t.Set[str]: 116 return {"user", "password", "account", "warehouse", "database", "role"} 117 118 @property 119 def _engine_adapter(self) -> t.Type[EngineAdapter]: 120 return engine_adapter.SnowflakeEngineAdapter 121 122 @property 123 def _connection_factory(self) -> t.Callable: 124 from snowflake import connector 125 126 return connector.connect 127 128 129class DatabricksSQLConnectionConfig(_ConnectionConfig): 130 """ 131 Configuration for the Databricks API connection. This connection is used to access the Databricks 132 when you don't have access to a SparkSession. Ex: Running Jupyter locally on your laptop to connect to a 133 Databricks cluster 134 135 Arg Source: https://github.com/databricks/databricks-sql-python/blob/main/src/databricks/sql/client.py#L39 136 Args: 137 server_hostname: Databricks instance host name. 138 http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef) 139 or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123) 140 access_token: Http Bearer access token, e.g. Databricks Personal Access Token. 141 http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request 142 session_configuration: An optional dictionary of Spark session parameters. Defaults to None. 143 Execute the SQL command `SET -v` to get a full list of available commands. 144 """ 145 146 server_hostname: str 147 http_path: str 148 access_token: str 149 http_headers: t.Optional[t.List[t.Tuple[str, str]]] 150 session_configuration: t.Optional[t.Dict[str, t.Any]] 151 152 concurrent_tasks: int = 4 153 154 type_: Literal["databricks_sql"] = Field(alias="type", default="databricks_sql") 155 156 _concurrent_tasks_validator = concurrent_tasks_validator 157 _http_headers_validator = http_headers_validator 158 159 @property 160 def _connection_kwargs_keys(self) -> t.Set[str]: 161 return { 162 "server_hostname", 163 "http_path", 164 "access_token", 165 "http_headers", 166 "session_configuration", 167 } 168 169 @property 170 def _engine_adapter(self) -> t.Type[EngineAdapter]: 171 return engine_adapter.DatabricksSQLEngineAdapter 172 173 @property 174 def _connection_factory(self) -> t.Callable: 175 from databricks import sql 176 177 return sql.connect 178 179 180class DatabricksSparkSessionConnectionConfig(_ConnectionConfig): 181 """ 182 Configuration for the Databricks connection. This connection is used to access the Databricks 183 when you have access to a SparkSession. Ex: Running in a Databricks notebook or cluster 184 185 Args: 186 spark_config: An optional dictionary of Spark session parameters. Defaults to None. 187 """ 188 189 spark_config: t.Optional[t.Dict[str, str]] = None 190 191 concurrent_tasks: Literal[1] = 1 192 193 type_: Literal["databricks_spark_session"] = Field( 194 alias="type", default="databricks_spark_session" 195 ) 196 197 @property 198 def _connection_kwargs_keys(self) -> t.Set[str]: 199 return set() 200 201 @property 202 def _engine_adapter(self) -> t.Type[EngineAdapter]: 203 return engine_adapter.DatabricksSparkSessionEngineAdapter 204 205 @property 206 def _connection_factory(self) -> t.Callable: 207 from sqlmesh.engines.spark.db_api.spark_session import connection 208 209 return connection 210 211 @property 212 def _static_connection_kwargs(self) -> t.Dict[str, t.Any]: 213 from pyspark import SparkConf 214 from pyspark.sql import SparkSession 215 216 spark_config = SparkConf() 217 if self.spark_config: 218 for k, v in self.spark_config.items(): 219 spark_config.set(k, v) 220 221 return dict( 222 spark=SparkSession.builder.config(conf=spark_config).enableHiveSupport().getOrCreate() 223 ) 224 225 226class DatabricksConnectionConfig(_ConnectionConfig): 227 """ 228 Databricks connection that prefers to use SparkSession if available, otherwise it will use the Databricks API. 229 230 Arg Source: https://github.com/databricks/databricks-sql-python/blob/main/src/databricks/sql/client.py#L39 231 Args: 232 server_hostname: Databricks instance host name. 233 http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef) 234 or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123) 235 access_token: Http Bearer access token, e.g. Databricks Personal Access Token. 236 http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request 237 session_configuration: An optional dictionary of Spark session parameters. Defaults to None. 238 Execute the SQL command `SET -v` to get a full list of available commands. 239 spark_config: An optional dictionary of Spark session parameters. Defaults to None. 240 """ 241 242 server_hostname: str 243 http_path: str 244 access_token: str 245 http_headers: t.Optional[t.List[t.Tuple[str, str]]] 246 session_configuration: t.Optional[t.Dict[str, t.Any]] 247 spark_config: t.Optional[t.Dict[str, str]] = None 248 249 concurrent_tasks: int = 4 250 251 type_: Literal["databricks"] = Field(alias="type", default="databricks") 252 253 _concurrent_tasks_validator = concurrent_tasks_validator 254 _http_headers_validator = http_headers_validator 255 256 _has_spark_session_access: bool 257 258 class Config: 259 allow_mutation = True 260 261 @property 262 def has_spark_session_access(self) -> bool: 263 if not getattr(self, "_has_spark_session_access", None): 264 try: 265 from pyspark.sql import SparkSession 266 267 spark = SparkSession.getActiveSession() 268 if spark: 269 self._has_spark_session_access = True 270 self.concurrent_tasks = 1 271 else: 272 self._has_spark_session_access = False 273 except ImportError: 274 self._has_spark_session_access = False 275 return self._has_spark_session_access 276 277 @property 278 def _connection_kwargs_keys(self) -> t.Set[str]: 279 if self.has_spark_session_access: 280 return set() 281 return { 282 "server_hostname", 283 "http_path", 284 "access_token", 285 "http_headers", 286 "session_configuration", 287 } 288 289 @property 290 def _engine_adapter(self) -> t.Type[EngineAdapter]: 291 if self.has_spark_session_access: 292 return engine_adapter.DatabricksSparkSessionEngineAdapter 293 return engine_adapter.DatabricksSQLEngineAdapter 294 295 @property 296 def _connection_factory(self) -> t.Callable: 297 if self.has_spark_session_access: 298 from sqlmesh.engines.spark.db_api.spark_session import connection 299 300 return connection 301 from databricks import sql 302 303 return sql.connect 304 305 @property 306 def _static_connection_kwargs(self) -> t.Dict[str, t.Any]: 307 if self.has_spark_session_access: 308 from pyspark import SparkConf 309 from pyspark.sql import SparkSession 310 311 spark_config = SparkConf() 312 if self.spark_config: 313 for k, v in self.spark_config.items(): 314 spark_config.set(k, v) 315 316 return dict( 317 spark=SparkSession.builder.config(conf=spark_config) 318 .enableHiveSupport() 319 .getOrCreate() 320 ) 321 return {} 322 323 324class BigQueryConnectionConfig(_ConnectionConfig): 325 """ 326 BigQuery Connection Configuration. 327 328 TODO: Need to update to support all the different authentication options 329 """ 330 331 concurrent_tasks: int = 4 332 333 type_: Literal["bigquery"] = Field(alias="type", default="bigquery") 334 335 @property 336 def _connection_kwargs_keys(self) -> t.Set[str]: 337 return set() 338 339 @property 340 def _engine_adapter(self) -> t.Type[EngineAdapter]: 341 return engine_adapter.BigQueryEngineAdapter 342 343 @property 344 def _connection_factory(self) -> t.Callable: 345 from google.cloud.bigquery.dbapi import connect 346 347 return connect 348 349 350class RedshiftConnectionConfig(_ConnectionConfig): 351 """ 352 Redshift Connection Configuration. 353 354 Arg Source: https://github.com/aws/amazon-redshift-python-driver/blob/master/redshift_connector/__init__.py#L146 355 Note: A subset of properties were selected. Please open an issue/PR if you want to see more supported. 356 357 Args: 358 user: The username to use for authentication with the Amazon Redshift cluster. 359 password: The password to use for authentication with the Amazon Redshift cluster. 360 database: The name of the database instance to connect to. 361 host: The hostname of the Amazon Redshift cluster. 362 port: The port number of the Amazon Redshift cluster. Default value is 5439. 363 source_address: No description provided 364 unix_sock: No description provided 365 ssl: Is SSL enabled. Default value is ``True``. SSL must be enabled when authenticating using IAM. 366 sslmode: The security of the connection to the Amazon Redshift cluster. 'verify-ca' and 'verify-full' are supported. 367 timeout: The number of seconds before the connection to the server will timeout. By default there is no timeout. 368 tcp_keepalive: Is `TCP keepalive <https://en.wikipedia.org/wiki/Keepalive#TCP_keepalive>`_ used. The default value is ``True``. 369 application_name: Sets the application name. The default value is None. 370 preferred_role: The IAM role preferred for the current connection. 371 principal_arn: The ARN of the IAM entity (user or role) for which you are generating a policy. 372 credentials_provider: The class name of the IdP that will be used for authenticating with the Amazon Redshift cluster. 373 region: The AWS region where the Amazon Redshift cluster is located. 374 cluster_identifier: The cluster identifier of the Amazon Redshift cluster. 375 iam: If IAM authentication is enabled. Default value is False. IAM must be True when authenticating using an IdP. 376 is_serverless: Redshift end-point is serverless or provisional. Default value false. 377 serverless_acct_id: The account ID of the serverless. Default value None 378 serverless_work_group: The name of work group for serverless end point. Default value None. 379 """ 380 381 user: t.Optional[str] 382 password: t.Optional[str] 383 database: t.Optional[str] 384 host: t.Optional[str] 385 port: t.Optional[int] 386 source_address: t.Optional[str] 387 unix_sock: t.Optional[str] 388 ssl: t.Optional[bool] 389 sslmode: t.Optional[str] 390 timeout: t.Optional[int] 391 tcp_keepalive: t.Optional[bool] 392 application_name: t.Optional[str] 393 preferred_role: t.Optional[str] 394 principal_arn: t.Optional[str] 395 credentials_provider: t.Optional[str] 396 region: t.Optional[str] 397 cluster_identifier: t.Optional[str] 398 iam: t.Optional[bool] 399 is_serverless: t.Optional[bool] 400 serverless_acct_id: t.Optional[str] 401 serverless_work_group: t.Optional[str] 402 403 concurrent_tasks: int = 4 404 405 type_: Literal["redshift"] = Field(alias="type", default="redshift") 406 407 @property 408 def _connection_kwargs_keys(self) -> t.Set[str]: 409 return { 410 "user", 411 "password", 412 "database", 413 "host", 414 "port", 415 "source_address", 416 "unix_sock", 417 "ssl", 418 "sslmode", 419 "timeout", 420 "tcp_keepalive", 421 "application_name", 422 "preferred_role", 423 "principal_arn", 424 "credentials_provider", 425 "region", 426 "cluster_identifier", 427 "iam", 428 "is_serverless", 429 "serverless_acct_id", 430 "serverless_work_group", 431 } 432 433 @property 434 def _engine_adapter(self) -> t.Type[EngineAdapter]: 435 return engine_adapter.RedshiftEngineAdapter 436 437 @property 438 def _connection_factory(self) -> t.Callable: 439 from redshift_connector import connect 440 441 return connect 442 443 444ConnectionConfig = Annotated[ 445 t.Union[ 446 DuckDBConnectionConfig, 447 SnowflakeConnectionConfig, 448 DatabricksSQLConnectionConfig, 449 DatabricksSparkSessionConnectionConfig, 450 DatabricksConnectionConfig, 451 BigQueryConnectionConfig, 452 RedshiftConnectionConfig, 453 ], 454 Field(discriminator="type_"), 455]
60class DuckDBConnectionConfig(_ConnectionConfig): 61 """Configuration for the DuckDB connection. 62 63 Args: 64 database: The optional database name. If not specified, the in-memory database will be used. 65 concurrent_tasks: The maximum number of tasks that can use this connection concurrently. 66 """ 67 68 database: t.Optional[str] 69 70 concurrent_tasks: Literal[1] = 1 71 72 type_: Literal["duckdb"] = Field(alias="type", default="duckdb") 73 74 @property 75 def _connection_kwargs_keys(self) -> t.Set[str]: 76 return {"database"} 77 78 @property 79 def _engine_adapter(self) -> t.Type[EngineAdapter]: 80 return engine_adapter.DuckDBEngineAdapter 81 82 @property 83 def _connection_factory(self) -> t.Callable: 84 import duckdb 85 86 return duckdb.connect
Configuration for the DuckDB connection.
Arguments:
- database: The optional database name. If not specified, the in-memory database will be used.
- concurrent_tasks: The maximum number of tasks that can use this connection concurrently.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
89class SnowflakeConnectionConfig(_ConnectionConfig): 90 """Configuration for the Snowflake connection. 91 92 Args: 93 user: The Snowflake username. 94 password: The Snowflake password. 95 account: The Snowflake account name. 96 warehouse: The optional warehouse name. 97 database: The optional database name. 98 role: The optional role name. 99 concurrent_tasks: The maximum number of tasks that can use this connection concurrently. 100 """ 101 102 user: str 103 password: str 104 account: str 105 warehouse: t.Optional[str] 106 database: t.Optional[str] 107 role: t.Optional[str] 108 109 concurrent_tasks: int = 4 110 111 type_: Literal["snowflake"] = Field(alias="type", default="snowflake") 112 113 _concurrent_tasks_validator = concurrent_tasks_validator 114 115 @property 116 def _connection_kwargs_keys(self) -> t.Set[str]: 117 return {"user", "password", "account", "warehouse", "database", "role"} 118 119 @property 120 def _engine_adapter(self) -> t.Type[EngineAdapter]: 121 return engine_adapter.SnowflakeEngineAdapter 122 123 @property 124 def _connection_factory(self) -> t.Callable: 125 from snowflake import connector 126 127 return connector.connect
Configuration for the Snowflake connection.
Arguments:
- user: The Snowflake username.
- password: The Snowflake password.
- account: The Snowflake account name.
- warehouse: The optional warehouse name.
- database: The optional database name.
- role: The optional role name.
- concurrent_tasks: The maximum number of tasks that can use this connection concurrently.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
130class DatabricksSQLConnectionConfig(_ConnectionConfig): 131 """ 132 Configuration for the Databricks API connection. This connection is used to access the Databricks 133 when you don't have access to a SparkSession. Ex: Running Jupyter locally on your laptop to connect to a 134 Databricks cluster 135 136 Arg Source: https://github.com/databricks/databricks-sql-python/blob/main/src/databricks/sql/client.py#L39 137 Args: 138 server_hostname: Databricks instance host name. 139 http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef) 140 or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123) 141 access_token: Http Bearer access token, e.g. Databricks Personal Access Token. 142 http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request 143 session_configuration: An optional dictionary of Spark session parameters. Defaults to None. 144 Execute the SQL command `SET -v` to get a full list of available commands. 145 """ 146 147 server_hostname: str 148 http_path: str 149 access_token: str 150 http_headers: t.Optional[t.List[t.Tuple[str, str]]] 151 session_configuration: t.Optional[t.Dict[str, t.Any]] 152 153 concurrent_tasks: int = 4 154 155 type_: Literal["databricks_sql"] = Field(alias="type", default="databricks_sql") 156 157 _concurrent_tasks_validator = concurrent_tasks_validator 158 _http_headers_validator = http_headers_validator 159 160 @property 161 def _connection_kwargs_keys(self) -> t.Set[str]: 162 return { 163 "server_hostname", 164 "http_path", 165 "access_token", 166 "http_headers", 167 "session_configuration", 168 } 169 170 @property 171 def _engine_adapter(self) -> t.Type[EngineAdapter]: 172 return engine_adapter.DatabricksSQLEngineAdapter 173 174 @property 175 def _connection_factory(self) -> t.Callable: 176 from databricks import sql 177 178 return sql.connect
Configuration for the Databricks API connection. This connection is used to access the Databricks when you don't have access to a SparkSession. Ex: Running Jupyter locally on your laptop to connect to a Databricks cluster
Arg Source: https://github.com/databricks/databricks-sql-python/blob/main/src/databricks/sql/client.py#L39
Arguments:
- server_hostname: Databricks instance host name.
- http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef) or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123)
- access_token: Http Bearer access token, e.g. Databricks Personal Access Token.
- http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request
- session_configuration: An optional dictionary of Spark session parameters. Defaults to None.
Execute the SQL command
SET -v
to get a full list of available commands.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
181class DatabricksSparkSessionConnectionConfig(_ConnectionConfig): 182 """ 183 Configuration for the Databricks connection. This connection is used to access the Databricks 184 when you have access to a SparkSession. Ex: Running in a Databricks notebook or cluster 185 186 Args: 187 spark_config: An optional dictionary of Spark session parameters. Defaults to None. 188 """ 189 190 spark_config: t.Optional[t.Dict[str, str]] = None 191 192 concurrent_tasks: Literal[1] = 1 193 194 type_: Literal["databricks_spark_session"] = Field( 195 alias="type", default="databricks_spark_session" 196 ) 197 198 @property 199 def _connection_kwargs_keys(self) -> t.Set[str]: 200 return set() 201 202 @property 203 def _engine_adapter(self) -> t.Type[EngineAdapter]: 204 return engine_adapter.DatabricksSparkSessionEngineAdapter 205 206 @property 207 def _connection_factory(self) -> t.Callable: 208 from sqlmesh.engines.spark.db_api.spark_session import connection 209 210 return connection 211 212 @property 213 def _static_connection_kwargs(self) -> t.Dict[str, t.Any]: 214 from pyspark import SparkConf 215 from pyspark.sql import SparkSession 216 217 spark_config = SparkConf() 218 if self.spark_config: 219 for k, v in self.spark_config.items(): 220 spark_config.set(k, v) 221 222 return dict( 223 spark=SparkSession.builder.config(conf=spark_config).enableHiveSupport().getOrCreate() 224 )
Configuration for the Databricks connection. This connection is used to access the Databricks when you have access to a SparkSession. Ex: Running in a Databricks notebook or cluster
Arguments:
- spark_config: An optional dictionary of Spark session parameters. Defaults to None.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
227class DatabricksConnectionConfig(_ConnectionConfig): 228 """ 229 Databricks connection that prefers to use SparkSession if available, otherwise it will use the Databricks API. 230 231 Arg Source: https://github.com/databricks/databricks-sql-python/blob/main/src/databricks/sql/client.py#L39 232 Args: 233 server_hostname: Databricks instance host name. 234 http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef) 235 or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123) 236 access_token: Http Bearer access token, e.g. Databricks Personal Access Token. 237 http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request 238 session_configuration: An optional dictionary of Spark session parameters. Defaults to None. 239 Execute the SQL command `SET -v` to get a full list of available commands. 240 spark_config: An optional dictionary of Spark session parameters. Defaults to None. 241 """ 242 243 server_hostname: str 244 http_path: str 245 access_token: str 246 http_headers: t.Optional[t.List[t.Tuple[str, str]]] 247 session_configuration: t.Optional[t.Dict[str, t.Any]] 248 spark_config: t.Optional[t.Dict[str, str]] = None 249 250 concurrent_tasks: int = 4 251 252 type_: Literal["databricks"] = Field(alias="type", default="databricks") 253 254 _concurrent_tasks_validator = concurrent_tasks_validator 255 _http_headers_validator = http_headers_validator 256 257 _has_spark_session_access: bool 258 259 class Config: 260 allow_mutation = True 261 262 @property 263 def has_spark_session_access(self) -> bool: 264 if not getattr(self, "_has_spark_session_access", None): 265 try: 266 from pyspark.sql import SparkSession 267 268 spark = SparkSession.getActiveSession() 269 if spark: 270 self._has_spark_session_access = True 271 self.concurrent_tasks = 1 272 else: 273 self._has_spark_session_access = False 274 except ImportError: 275 self._has_spark_session_access = False 276 return self._has_spark_session_access 277 278 @property 279 def _connection_kwargs_keys(self) -> t.Set[str]: 280 if self.has_spark_session_access: 281 return set() 282 return { 283 "server_hostname", 284 "http_path", 285 "access_token", 286 "http_headers", 287 "session_configuration", 288 } 289 290 @property 291 def _engine_adapter(self) -> t.Type[EngineAdapter]: 292 if self.has_spark_session_access: 293 return engine_adapter.DatabricksSparkSessionEngineAdapter 294 return engine_adapter.DatabricksSQLEngineAdapter 295 296 @property 297 def _connection_factory(self) -> t.Callable: 298 if self.has_spark_session_access: 299 from sqlmesh.engines.spark.db_api.spark_session import connection 300 301 return connection 302 from databricks import sql 303 304 return sql.connect 305 306 @property 307 def _static_connection_kwargs(self) -> t.Dict[str, t.Any]: 308 if self.has_spark_session_access: 309 from pyspark import SparkConf 310 from pyspark.sql import SparkSession 311 312 spark_config = SparkConf() 313 if self.spark_config: 314 for k, v in self.spark_config.items(): 315 spark_config.set(k, v) 316 317 return dict( 318 spark=SparkSession.builder.config(conf=spark_config) 319 .enableHiveSupport() 320 .getOrCreate() 321 ) 322 return {}
Databricks connection that prefers to use SparkSession if available, otherwise it will use the Databricks API.
Arg Source: https://github.com/databricks/databricks-sql-python/blob/main/src/databricks/sql/client.py#L39
Arguments:
- server_hostname: Databricks instance host name.
- http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef) or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123)
- access_token: Http Bearer access token, e.g. Databricks Personal Access Token.
- http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request
- session_configuration: An optional dictionary of Spark session parameters. Defaults to None.
Execute the SQL command
SET -v
to get a full list of available commands. - spark_config: An optional dictionary of Spark session parameters. Defaults to None.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
325class BigQueryConnectionConfig(_ConnectionConfig): 326 """ 327 BigQuery Connection Configuration. 328 329 TODO: Need to update to support all the different authentication options 330 """ 331 332 concurrent_tasks: int = 4 333 334 type_: Literal["bigquery"] = Field(alias="type", default="bigquery") 335 336 @property 337 def _connection_kwargs_keys(self) -> t.Set[str]: 338 return set() 339 340 @property 341 def _engine_adapter(self) -> t.Type[EngineAdapter]: 342 return engine_adapter.BigQueryEngineAdapter 343 344 @property 345 def _connection_factory(self) -> t.Callable: 346 from google.cloud.bigquery.dbapi import connect 347 348 return connect
BigQuery Connection Configuration.
TODO: Need to update to support all the different authentication options
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
351class RedshiftConnectionConfig(_ConnectionConfig): 352 """ 353 Redshift Connection Configuration. 354 355 Arg Source: https://github.com/aws/amazon-redshift-python-driver/blob/master/redshift_connector/__init__.py#L146 356 Note: A subset of properties were selected. Please open an issue/PR if you want to see more supported. 357 358 Args: 359 user: The username to use for authentication with the Amazon Redshift cluster. 360 password: The password to use for authentication with the Amazon Redshift cluster. 361 database: The name of the database instance to connect to. 362 host: The hostname of the Amazon Redshift cluster. 363 port: The port number of the Amazon Redshift cluster. Default value is 5439. 364 source_address: No description provided 365 unix_sock: No description provided 366 ssl: Is SSL enabled. Default value is ``True``. SSL must be enabled when authenticating using IAM. 367 sslmode: The security of the connection to the Amazon Redshift cluster. 'verify-ca' and 'verify-full' are supported. 368 timeout: The number of seconds before the connection to the server will timeout. By default there is no timeout. 369 tcp_keepalive: Is `TCP keepalive <https://en.wikipedia.org/wiki/Keepalive#TCP_keepalive>`_ used. The default value is ``True``. 370 application_name: Sets the application name. The default value is None. 371 preferred_role: The IAM role preferred for the current connection. 372 principal_arn: The ARN of the IAM entity (user or role) for which you are generating a policy. 373 credentials_provider: The class name of the IdP that will be used for authenticating with the Amazon Redshift cluster. 374 region: The AWS region where the Amazon Redshift cluster is located. 375 cluster_identifier: The cluster identifier of the Amazon Redshift cluster. 376 iam: If IAM authentication is enabled. Default value is False. IAM must be True when authenticating using an IdP. 377 is_serverless: Redshift end-point is serverless or provisional. Default value false. 378 serverless_acct_id: The account ID of the serverless. Default value None 379 serverless_work_group: The name of work group for serverless end point. Default value None. 380 """ 381 382 user: t.Optional[str] 383 password: t.Optional[str] 384 database: t.Optional[str] 385 host: t.Optional[str] 386 port: t.Optional[int] 387 source_address: t.Optional[str] 388 unix_sock: t.Optional[str] 389 ssl: t.Optional[bool] 390 sslmode: t.Optional[str] 391 timeout: t.Optional[int] 392 tcp_keepalive: t.Optional[bool] 393 application_name: t.Optional[str] 394 preferred_role: t.Optional[str] 395 principal_arn: t.Optional[str] 396 credentials_provider: t.Optional[str] 397 region: t.Optional[str] 398 cluster_identifier: t.Optional[str] 399 iam: t.Optional[bool] 400 is_serverless: t.Optional[bool] 401 serverless_acct_id: t.Optional[str] 402 serverless_work_group: t.Optional[str] 403 404 concurrent_tasks: int = 4 405 406 type_: Literal["redshift"] = Field(alias="type", default="redshift") 407 408 @property 409 def _connection_kwargs_keys(self) -> t.Set[str]: 410 return { 411 "user", 412 "password", 413 "database", 414 "host", 415 "port", 416 "source_address", 417 "unix_sock", 418 "ssl", 419 "sslmode", 420 "timeout", 421 "tcp_keepalive", 422 "application_name", 423 "preferred_role", 424 "principal_arn", 425 "credentials_provider", 426 "region", 427 "cluster_identifier", 428 "iam", 429 "is_serverless", 430 "serverless_acct_id", 431 "serverless_work_group", 432 } 433 434 @property 435 def _engine_adapter(self) -> t.Type[EngineAdapter]: 436 return engine_adapter.RedshiftEngineAdapter 437 438 @property 439 def _connection_factory(self) -> t.Callable: 440 from redshift_connector import connect 441 442 return connect
Redshift Connection Configuration.
Arg Source: https://github.com/aws/amazon-redshift-python-driver/blob/master/redshift_connector/__init__.py#L146 Note: A subset of properties were selected. Please open an issue/PR if you want to see more supported.
Arguments:
- user: The username to use for authentication with the Amazon Redshift cluster.
- password: The password to use for authentication with the Amazon Redshift cluster.
- database: The name of the database instance to connect to.
- host: The hostname of the Amazon Redshift cluster.
- port: The port number of the Amazon Redshift cluster. Default value is 5439.
- source_address: No description provided
- unix_sock: No description provided
- ssl: Is SSL enabled. Default value is
True
. SSL must be enabled when authenticating using IAM. - sslmode: The security of the connection to the Amazon Redshift cluster. 'verify-ca' and 'verify-full' are supported.
- timeout: The number of seconds before the connection to the server will timeout. By default there is no timeout.
- tcp_keepalive: Is TCP keepalive used. The default value is
True
. - application_name: Sets the application name. The default value is None.
- preferred_role: The IAM role preferred for the current connection.
- principal_arn: The ARN of the IAM entity (user or role) for which you are generating a policy.
- credentials_provider: The class name of the IdP that will be used for authenticating with the Amazon Redshift cluster.
- region: The AWS region where the Amazon Redshift cluster is located.
- cluster_identifier: The cluster identifier of the Amazon Redshift cluster.
- iam: If IAM authentication is enabled. Default value is False. IAM must be True when authenticating using an IdP.
- is_serverless: Redshift end-point is serverless or provisional. Default value false.
- serverless_acct_id: The account ID of the serverless. Default value None
- serverless_work_group: The name of work group for serverless end point. Default value None.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs