Source code for camcops_server.cc_modules.cc_anon

#!/usr/bin/env python
# camcops_server/cc_modules/cc_sqlalchemy.py

"""
===============================================================================
    Copyright (C) 2012-2018 Rudolf Cardinal (rudolf@pobox.com).

    This file is part of CamCOPS.

    CamCOPS is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    CamCOPS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with CamCOPS. If not, see <http://www.gnu.org/licenses/>.
===============================================================================
"""

from collections import OrderedDict
import re
from typing import Dict, List, Pattern, Tuple

from .cc_constants import TSV_PATIENT_FIELD_PREFIX
from . import cc_db


# =============================================================================
# For anonymisation
# =============================================================================

[docs]def get_literal_regex(x: str) -> Pattern: """Regex for anonymisation. Literal at word boundaries.""" # http://stackoverflow.com/questions/919056 wb = "\\b" # word boundary; escape the slash return re.compile(wb + re.escape(x) + wb, re.IGNORECASE)
# ============================================================================= # For anonymisation staging database # ============================================================================= def get_type_size_as_text_from_sqltype(sqltype: str) -> Tuple[str, str]: size = "" finaltype = sqltype m = re.match("(\w+)\((\w+)\)", sqltype) # e.g. VARCHAR(10) if m: finaltype = m.group(1) size = m.group(2) return finaltype, size
[docs]def get_cris_dd_row(taskname: str, tablename: str, fieldspec: "FIELDSPEC_TYPE") -> Dict: """Returns an OrderedDict with information for a CRIS Data Dictionary row, given a fieldspec.""" cc_db.add_sqltype_to_fieldspec_in_place(fieldspec) cctype = fieldspec["cctype"] sqltype = fieldspec["sqltype"] colname = fieldspec["name"] anon = fieldspec.get("anon", False) # optional comment = fieldspec.get("comment", "") # optional identifies_patient = fieldspec.get("identifies_patient", False) patientlen = len(TSV_PATIENT_FIELD_PREFIX) patientfield = (colname[:patientlen] == TSV_PATIENT_FIELD_PREFIX) finaltype, size = get_type_size_as_text_from_sqltype(sqltype) if "pv" in fieldspec: valid_values = ",".join([str(x) for x in fieldspec["pv"]]) else: valid_values = "" might_need_scrubbing = cctype in ["TEXT"] and not anon tlfa = "Y" if might_need_scrubbing else "" system_id = ( colname == "id" or colname == "patient_id" or colname == TSV_PATIENT_FIELD_PREFIX + "id" or (colname[:1] == "_" and colname[-3:] == "_pk") ) internal_field = ( colname[:1] == "_" and not patientfield ) or ( patientfield and colname[patientlen:patientlen + 1] == "_" ) if system_id or internal_field: security_status = "1" # drop (e.g. for pointless internal keys) elif identifies_patient and colname == TSV_PATIENT_FIELD_PREFIX + "dob": security_status = "3" # truncate (e.g. DOB, postcode) elif identifies_patient: security_status = "2" # use to scrub else: security_status = "4" # bring through if system_id: feft = "34" # patient ID; other internal keys elif cc_db.cctype_is_date(cctype): feft = "4" # dates else: feft = "1" # text, numbers return OrderedDict([ ("Tab", "CamCOPS"), ("Form name", taskname), ("CRIS tree label", colname), ("Source system table name", tablename), ("SQL column name", colname), ("Front end field type", feft), ("Valid values", valid_values), ("Result column name", colname), ("Family doc tab name", ""), ("Family doc form name", ""), ("Security status", security_status), ("Exclude", ""), ("End SQL Type", finaltype), ("Header field (Y/N)", ""), ("Header field name", ""), ("Header field active (Y/N)", ""), ("View name", ""), ("Exclude from family doc", ""), ("Tag list - fields anon", tlfa), ("Anon type", ""), # formerly "Additional info" ("Form start date", ""), ("Form end date", ""), ("Source", ""), ("Size", size), ("Header logic", ""), ("Patient/contact", ""), ("Comments", comment), ])
def get_cris_dd_rows_from_fieldspecs( taskname: str, tablename: str, fieldspecs: "FIELDSPECLIST_TYPE") -> List[Dict]: rows = [] for fs in fieldspecs: rows.append(get_cris_dd_row(taskname, tablename, fs)) return rows