Coverage for cc_modules/cc_sqlalchemy.py : 64%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
3"""
4camcops_server/cc_modules/cc_sqlalchemy.py
6===============================================================================
8 Copyright (C) 2012-2020 Rudolf Cardinal (rudolf@pobox.com).
10 This file is part of CamCOPS.
12 CamCOPS is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
17 CamCOPS is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
25===============================================================================
27**SQLAlchemy helper functions and constants.**
29We define our metadata ``Base`` here, and things like our index naming
30convention and MySQL table formats.
32A few random notes:
34- SQLAlchemy will automatically warn about clashing columns:
36 .. :code-block:: python
38 from sqlalchemy import Column, Integer
39 from sqlalchemy.ext.declarative import declarative_base
41 Base = declarative_base()
43 class Thing(Base):
44 __tablename__ = "thing"
45 a = Column("a", Integer, primary_key=True)
46 b = Column("b", Integer)
47 c = Column("b", Integer) # produces a warning:
49 .. code-block:: none
51 SAWarning: On class 'Thing', Column object 'b' named directly multiple
52 times, only one will be used: b, c. Consider using orm.synonym instead
54"""
56from abc import ABCMeta
57from io import StringIO
58import logging
59import sqlite3
61from cardinal_pythonlib.logs import BraceStyleAdapter
62from cardinal_pythonlib.sqlalchemy.dialect import SqlaDialectName
63from cardinal_pythonlib.sqlalchemy.dump import dump_ddl
64from cardinal_pythonlib.sqlalchemy.session import (
65 make_sqlite_url,
66 SQLITE_MEMORY_URL,
67)
68from pendulum import DateTime as Pendulum
70from sqlalchemy.engine import create_engine
71from sqlalchemy.engine.base import Engine
72from sqlalchemy.ext.declarative import declarative_base, DeclarativeMeta
73from sqlalchemy.sql.schema import MetaData
75from camcops_server.cc_modules.cc_cache import cache_region_static, fkg
77log = BraceStyleAdapter(logging.getLogger(__name__))
79# =============================================================================
80# Naming convention; metadata; Base
81# =============================================================================
82# https://alembic.readthedocs.org/en/latest/naming.html
83# http://docs.sqlalchemy.org/en/latest/core/constraints.html#configuring-constraint-naming-conventions # noqa
85MYSQL_MAX_IDENTIFIER_LENGTH = 64
86LONG_COLUMN_NAME_WARNING_LIMIT = 30
88NAMING_CONVENTION = {
89 # - Note that constraint names must be unique in the DATABASE, not the
90 # table;
91 # https://dev.mysql.com/doc/refman/5.6/en/create-table-foreign-keys.html
92 # - Index names only have to be unique for the table;
93 # https://stackoverflow.com/questions/30653452/do-index-names-have-to-be-unique-across-entire-database-in-mysql # noqa
95 # INDEX:
96 "ix": 'ix_%(column_0_label)s',
98 # UNIQUE CONSTRAINT:
99 "uq": "uq_%(table_name)s_%(column_0_name)s",
100 # "uq": "uq_%(column_0_name)s",
102 # CHECK CONSTRAINT:
103 # "ck": "ck_%(table_name)s_%(constraint_name)s", # too long for MySQL
104 # ... https://groups.google.com/forum/#!topic/sqlalchemy/SIT4D8S9dUg
105 # "ck": "ck_%(table_name)s_%(column_0_name)s",
106 # Problem 2018-09-14:
107 # - constraints must be unique across database
108 # - MySQL only accepts 64 characters for constraint name
109 # - using "%(column_0_name)" means that explicit constrant names are
110 # ignored
111 # - using "%(constraint_name)" means that all constraints have to be named
112 # explicitly (very tedious)
113 # - so truncate?
114 # https://docs.python.org/3/library/stdtypes.html#old-string-formatting
115 # https://www.python.org/dev/peps/pep-0237/
116 # - The main problem is BOOL columns, e.g.
117 # cpft_lps_discharge.management_specialling_behavioural_disturbance
118 # - Example:
119 # longthing = "abcdefghijklmnopqrstuvwxyz"
120 # d = {"thing": longthing}
121 # "hello %(thing).10s world" % d # LEFT TRUNCATE
122 # # ... gives 'hello abcdefghij world'
123 # "ck": "ck_%(table_name).30s_%(column_0_name).30s",
124 # 3 for "ck_" leaves 61; 30 for table, 1 for "_", 30 for column
125 # ... no...
126 # "obs_contamination_bodily_waste_*"
127 "ck": "ck_%(table_name)s_%(column_0_name)s", # unique but maybe too long
129 # FOREIGN KEY:
130 # "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", # too long for MySQL sometimes! # noqa
131 "fk": "fk_%(table_name)s_%(column_0_name)s",
132 # "fk": "fk_%(column_0_name)s",
134 # PRIMARY KEY:
135 "pk": "pk_%(table_name)s"
136}
137MASTER_META = MetaData(naming_convention=NAMING_CONVENTION)
139# The base of all our model classes:
140Base = declarative_base(metadata=MASTER_META)
142# Special options:
143Base.__table_args__ = {
144 # -------------------------------------------------------------------------
145 # MySQL special options
146 # -------------------------------------------------------------------------
147 # SQLAlchemy __table_args__:
148 # http://docs.sqlalchemy.org/en/latest/orm/extensions/declarative/table_config.html # noqa
149 # SQLAlchemy sends keyword arguments like 'mysql_keyword_name' to be
150 # rendered as KEYWORD_NAME in the CREATE TABLE statement:
151 # http://docs.sqlalchemy.org/en/latest/dialects/mysql.html
153 # Engine: InnoDB
154 'mysql_engine': 'InnoDB',
156 # Barracuda: COMPRESSED or DYNAMIC
157 # https://dev.mysql.com/doc/refman/5.7/en/innodb-row-format-dynamic.html
158 # https://xenforo.com/community/threads/anyone-running-their-innodb-tables-with-row_format-compressed.99606/ # noqa
159 # We shouldn't compress everything by default; performance hit.
160 'mysql_row_format': 'DYNAMIC',
162 # SEE server_troubleshooting.rst FOR BUG DISCUSSION
164 'mysql_charset': 'utf8mb4 COLLATE utf8mb4_unicode_ci',
166 # Character set
167 # REPLACED # 'mysql_charset': 'utf8mb4',
168 # https://dev.mysql.com/doc/refman/5.5/en/charset-unicode-utf8mb4.html
170 # Collation
171 # Which collation for MySQL? See
172 # - https://stackoverflow.com/questions/766809/whats-the-difference-between-utf8-general-ci-and-utf8-unicode-ci # noqa
173 # REPLACED # 'mysql_collate': 'utf8mb4_unicode_ci'
174 # Note that COLLATION rules are, from least to greatest precedence:
175 # Server collation
176 # Connection-specific collation
177 # Database collation
178 # Table collation
179 # Column collation
180 # Query collation (using CAST or CONVERT)
181 # - https://stackoverflow.com/questions/24356090/difference-between-database-table-column-collation # noqa
182 # Therefore, we can set the table collation for all our tables, and not
183 # worry about the column collation, e.g. Text(collation=...).
184 #
185 # To check a MySQL database, and connection/server settings:
186 # SHOW VARIABLES LIKE '%character%';
187 # SHOW VARIABLES LIKE '%collation%';
188 # To check tables:
189 # SHOW TABLE STATUS WHERE NAME LIKE 'my_tablename'\G
190 # ... note use of \G to produce long-form output!
191 # To check columns:
192 # SHOW FULL COLUMNS FROM my_tablename;
193 #
194 # ONE THING IN PARTICULAR TO BEWARE: utf8mb4_unicode_ci produces
195 # CASE-INSENSITIVE COMPARISON. For example:
196 # SELECT 'a' = 'A'; -- produces 1
197 # SELECT 'a' = 'B'; -- produces 0
198 # SELECT BINARY 'a' = BINARY 'A'; -- produces 0
199 # This is a PROBLEM FOR PASSWORD FIELDS IF WE INTEND TO DO DATABASE-LEVEL
200 # COMPARISONS WITH THEM. In that case we must ensure a different collation
201 # is set; specifically, use
202 #
203 # utf8mb4_bin
204 #
205 # and see also
206 # SHOW COLLATION WHERE `Collation` LIKE 'utf8mb4%';
207 # and
208 # https://dev.mysql.com/doc/refman/5.6/en/charset-binary-collations.html
209 #
210 # To check, run
211 # SHOW FULL COLUMNS FROM _security_users;
212}
214# MySQL things we can't set via SQLAlchemy, but would like to be set:
215# - max_allowed_packet: should be at least 32M
216# - innodb_strict_mode: should be 1, but less of a concern with SQLAlchemy
218# MySQL things we don't care about too much:
219# - innodb_file_per_table: desirable, but up to the user.
222class DeclarativeAndABCMeta(DeclarativeMeta, ABCMeta):
223 """
224 Metaclass for classes that want to inherit from Base and also ABC:
225 """
226 pass
229# =============================================================================
230# Convenience functions
231# =============================================================================
233def make_memory_sqlite_engine(echo: bool = False) -> Engine:
234 """
235 Create an SQLAlchemy :class:`Engine` for an in-memory SQLite database.
236 """
237 return create_engine(SQLITE_MEMORY_URL, echo=echo)
240def make_file_sqlite_engine(filename: str, echo: bool = False) -> Engine:
241 """
242 Create an SQLAlchemy :class:`Engine` for an on-disk SQLite database.
243 """
244 return create_engine(make_sqlite_url(filename), echo=echo)
247def sql_from_sqlite_database(connection: sqlite3.Connection) -> str:
248 """
249 Returns SQL to describe an SQLite database.
251 Args:
252 connection: connection to SQLite database via ``sqlite3`` module
254 Returns:
255 the SQL
257 """
258 with StringIO() as f:
259 # noinspection PyTypeChecker
260 for line in connection.iterdump():
261 f.write(line + "\n")
262 f.flush()
263 return f.getvalue()
266@cache_region_static.cache_on_arguments(function_key_generator=fkg)
267def get_all_ddl(dialect_name: str = SqlaDialectName.MYSQL) -> str:
268 """
269 Returns the DDL (data definition language; SQL ``CREATE TABLE`` commands)
270 for our SQLAlchemy metadata.
272 Args:
273 dialect_name: SQLAlchemy dialect name
274 """
275 metadata = Base.metadata # type: MetaData
276 with StringIO() as f:
277 dump_ddl(metadata, dialect_name=dialect_name, fileobj=f)
278 f.flush()
279 text = f.getvalue()
280 return text
283def log_all_ddl(dialect_name: str = SqlaDialectName.MYSQL) -> None:
284 """
285 Send the DDL for our SQLAlchemy metadata to the Python log.
287 Args:
288 dialect_name: SQLAlchemy dialect name
289 """
290 text = get_all_ddl(dialect_name)
291 log.info(text)
292 log.info("DDL length: {} characters", len(text))
295def assert_constraint_name_ok(table_name: str, column_name: str) -> None:
296 """
297 Checks that the automatically generated name of a constraint isn't too long
298 for specific databases.
300 Args:
301 table_name: table name
302 column_name: column name
304 Raises:
305 AssertionError, if something will break
306 """
307 d = {
308 "table_name": table_name,
309 "column_0_name": column_name,
310 }
311 anticipated_name = NAMING_CONVENTION["ck"] % d
312 if len(anticipated_name) > MYSQL_MAX_IDENTIFIER_LENGTH:
313 raise AssertionError(
314 f"Constraint name too long for table {table_name!r}, column "
315 f"{column_name!r}; will be {anticipated_name!r} "
316 f"of length {len(anticipated_name)}")
319# =============================================================================
320# Database engine hacks
321# =============================================================================
323def hack_pendulum_into_pymysql() -> None:
324 """
325 Hack in support for :class:`pendulum.DateTime` into the ``pymysql``
326 database interface.
328 See https://pendulum.eustace.io/docs/#limitations.
329 """
330 try:
331 # noinspection PyUnresolvedReferences
332 from pymysql.converters import encoders, escape_datetime
333 encoders[Pendulum] = escape_datetime
334 except ImportError:
335 pass
338hack_pendulum_into_pymysql()