Source code for linkml.utils.generator

import abc
import logging
from contextlib import redirect_stdout
from io import StringIO
from typing import List, Set, Union, TextIO, Optional, cast, Callable, Type, Dict
import re

import click
from click import Command, Argument, Option

from linkml_runtime.linkml_model.meta import SchemaDefinition, ClassDefinition, SlotDefinition, ClassDefinitionName, \
    TypeDefinition, Element, SlotDefinitionName, TypeDefinitionName, PrefixPrefixPrefix, ElementName, \
    SubsetDefinition, SubsetDefinitionName, EnumDefinition, EnumDefinitionName, Definition
from linkml_runtime.utils.formatutils import camelcase, underscore
from linkml.utils.mergeutils import alias_root
from linkml.utils.schemaloader import SchemaLoader
from linkml.utils.typereferences import References

DEFAULT_LOG_LEVEL: str = "WARNING"
DEFAULT_LOG_LEVEL_INT: int = logging.WARNING


[docs]class Generator(metaclass=abc.ABCMeta): generatorname: str = None # Set to os.path.basename(__file__) generatorversion: str = None # Generator version identifier valid_formats: List[str] = [] # Allowed formats - first format is default directory_output: bool = False # True means output is to a directory, False is to stdout base_dir: str = None # Base directory of schema visit_all_class_slots: bool = False # False means only visit own slots, True means visit all slots visits_are_sorted: bool = True # True means visit basic types in alphabetial order, false in entry sort_class_slots: bool = False # True means visit class slots in alphabetical order metamodel_name_map: Dict[str, str] = None # Allows mapping of names of metamodel elements such as slot, etc def __init__(self, schema: Union[str, TextIO, SchemaDefinition, "Generator"], format: Optional[str] = None, metadata: bool = True, useuris: Optional[bool] = None, importmap: Optional[str] = None, log_level: int = DEFAULT_LOG_LEVEL_INT, mergeimports: Optional[bool] = True, source_file_date: Optional[str] = None, source_file_size: Optional[int] = None, logger: Optional[logging.Logger] = None, **kwargs) -> None: """ Constructor :param schema: metamodel compliant schema. Can be URI, file name, actual schema, another generator, an open file or a pre-parsed schema. :param format: expected output format :param metadata: True means include date, generator, etc. information in source header if appropriate :param useuris: True means declared class slot uri's are used. False means use model uris :param importmap: File name of import mapping file -- maps import name/uri to target :param log_level: Logging level :param mergeimports: True means merge non-linkml sources into importing package. False means separate packages. :param source_file_date: Modification date of input source file :param source_file_size: Source file size :param logger: pre-set logger """ if logger: self.logger = logger else: logging.basicConfig() self.logger = logging.getLogger(self.__class__.__name__) self.logger.setLevel(log_level) if format is None: format = self.valid_formats[0] assert format in self.valid_formats, f"Unrecognized format: {format}" self.format = format self.emit_metadata = metadata self.merge_imports = mergeimports self.source_file_date = source_file_date if metadata else None self.source_file_size = source_file_size if metadata else None if isinstance(schema, Generator): gen = schema self.schema = gen.schema self.synopsis = gen.synopsis self.loaded = gen.loaded self.namespaces = gen.namespaces self.base_dir = gen.base_dir self.importmap = gen.importmap self.source_file_data = gen.source_file_date self.source_file_size = gen.source_file_size self.schema_location = gen.schema_location self.schema_defaults = gen.schema_defaults self.logger = gen.logger else: loader = SchemaLoader(schema, self.base_dir, useuris=useuris, importmap=importmap, logger=self.logger, mergeimports=mergeimports, emit_metadata=metadata, source_file_date=self.source_file_date, source_file_size=self.source_file_size) loader.resolve() self.schema = loader.schema self.synopsis = loader.synopsis self.loaded = loader.loaded self.namespaces = loader.namespaces self.base_dir = loader.base_dir self.importmap = loader.importmap self.source_file_data = loader.source_file_date self.source_file_size = loader.source_file_size self.schema_location = loader.schema_location self.schema_defaults = loader.schema_defaults
[docs] def serialize(self, **kwargs) -> str: """ Generate output in the required format :param kwargs: Generater specific parameters :return: Generated output """ output = StringIO() with redirect_stdout(output): self.visit_schema(**kwargs) for sn, ss in (sorted(self.schema.subsets.items(), key=lambda s: s[0].lower()) if self.visits_are_sorted else self.schema.subsets.items()): self.visit_subset(ss) for tn, typ in (sorted(self.schema.types.items(), key=lambda s: s[0].lower()) if self.visits_are_sorted else self.schema.types.items()): self.visit_type(typ) for enum in (sorted(self.schema.enums.values(), key=lambda e: e.name.lower()) if self.visits_are_sorted else self.schema.enums.values()): self.visit_enum(enum) for sn, slot in (sorted(self.schema.slots.items(), key=lambda c: c[0].lower()) if self.visits_are_sorted else self.schema.slots.items()): self.visit_slot(self.aliased_slot_name(slot), slot) for cls in (sorted(self.schema.classes.values(), key=lambda c: c.name.lower()) if self.visits_are_sorted else self.schema.classes.values()): if self.visit_class(cls): for slot in self.all_slots(cls) if self.visit_all_class_slots else self.own_slots(cls): self.visit_class_slot(cls, self.aliased_slot_name(slot), slot) self.end_class(cls) self.end_schema(**kwargs) return output.getvalue()
[docs] def visit_schema(self, **kwargs) -> None: """ Visited once at the beginning of generation @param kwargs: Arguments passed through from CLI -- implementation dependent """ ...
[docs] def end_schema(self, **kwargs) -> None: """ Visited once at the end of generation @param kwargs: Arguments passed through from CLI -- implementation dependent """ ...
[docs] def visit_class(self, cls: ClassDefinition) -> bool: """ Visited once per schema class @param cls: class being visited @return: Visit slots and end class. False means skip and go on """ return True
[docs] def end_class(self, cls: ClassDefinition) -> None: """ Visited after visit_class_slots (if visit_class returned true) @param cls: class being visited """ ...
[docs] def visit_class_slot(self, cls: ClassDefinition, aliased_slot_name: str, slot: SlotDefinition) -> None: """ Visited for each slot in a class. If class level visit_all_slots is true, this is visited once for any class that is inherited (class itself, is_a, mixin, apply_to). Otherwise just the own slots. @param cls: containing class @param aliased_slot_name: Aliased slot name. May not be unique across all class slots @param slot: slot being visited """ ...
[docs] def visit_slot(self, aliased_slot_name: str, slot: SlotDefinition) -> None: """ Visited once for every slot definition in the schema. @param aliased_slot_name: Aliased name of the slot. May not be unique @param slot: visited slot """ ...
[docs] def visit_type(self, typ: TypeDefinition) -> None: """ Visited once for every type definition in the schema @param typ: Type definition """ ...
[docs] def visit_subset(self, subset: SubsetDefinition) -> None: """ Visited once for every subset definition in the schema #param subset: Subset definition """ ...
[docs] def visit_enum(self, enum: EnumDefinition) -> None: """ Visited once for every enum definition in the schema @param enum: Enum definition """ ...
# ============================= # Helper methods # =============================
[docs] def own_slots(self, cls: Union[ClassDefinitionName, ClassDefinition]) -> List[SlotDefinition]: """ Return the list of slots owned the class definition. An "own slot" is any ``cls`` slot that does not appear in the class is_a parent. Own_slots include: * any slot whose domain is cls * slot_usage entries * slots from mixins entries * slots from apply_to entries @param cls: class name or class definition name @return: list of owned slots. List is sorted if sort_class_slots is true, otherwise in class order """ if not isinstance(cls, ClassDefinition): cls = self.schema.classes[cls] parent = self.schema.classes[cls.is_a] if cls.is_a else None seen = set() rval = [] for sname in cls.slots: sname_base = alias_root(self.schema, sname) if sname_base not in seen and (not parent or sname not in parent.slots): slot = self.schema.slots[sname] rval.append(slot) seen.add(sname_base) return sorted(rval, key=lambda s: s.name) if self.sort_class_slots else rval
[docs] def own_slot_names(self, cls: Union[ClassDefinitionName, ClassDefinition]) -> List[SlotDefinitionName]: return [slot.name for slot in self.own_slots(cls)]
[docs] def all_slots(self, cls: Union[ClassDefinitionName, ClassDefinition], *, cls_slots_first: bool = False, seen: Optional[Set[ClassDefinitionName]] = None) \ -> List[SlotDefinition]: """ Return all slots that are part of the class definition. This includes all is_a, mixin and apply_to slots but does NOT include slot_usage targets. If class B has a slot_usage entry for slot "s", only the slot definition for the redefined slot will be included, not its base. Slots are added in the order they appear in classes, with recursive is_a's being added first followed by mixins and finally apply_tos @param cls: class definition or class definition name @param cls_slots_first: True means return own slots at the top of the list @param seen: List of slots already recorded. Used for internal recursion @return: ordered list of slots in the class with slot usages removed """ if not isinstance(cls, ClassDefinition): cls = self.schema.classes[cls] if seen is None: seen = set() rval = [] parent = self.schema.classes[cls.is_a] if cls.is_a else None if cls_slots_first: for slot in self.own_slots(cls): sname_base = alias_root(self.schema, slot.name) if sname_base not in seen: rval.append(slot) seen.add(sname_base) return rval + (self.all_slots(parent, cls_slots_first=cls_slots_first, seen=seen) if parent else []) else: for sname in cls.slots: sname_base = alias_root(self.schema, sname) if sname_base not in seen: slot = self.schema.slots[sname] rval.append(slot) seen.add(sname_base) return sorted(rval, key=lambda s: s.name) if self.sort_class_slots else rval
[docs] def parent(self, element: Union[ClassDefinition, SlotDefinition]) \ -> Optional[Union[ClassDefinition, SlotDefinition]]: """ Return the parent of element, if any """ return \ None if element.is_a is None else \ self.schema.classes[element.is_a] if isinstance(element, ClassDefinition) else \ self.schema.slots[element.is_a]
[docs] def ancestors(self, element: Union[ClassDefinition, SlotDefinition]) -> List[ElementName]: """ Return an ordered list of ancestor names for the supplied slot or class @param element: Slot or class name or definition @return: Ordered list of of ancestor names """ return [element.name] + ([] if element.is_a is None else self.ancestors(self.parent(element)))
[docs] def neighborhood(self, elements: Union[str, ElementName, List[ElementName]]) \ -> References: """ Return a list of all slots, classes and types that touch any element in elements, including the element itself @param elements: Element names to do proximity with @return: All slots and classes that touch element """ if isinstance(elements, (str, ElementName)): elements = [elements] touches = References() for element in elements: if element in self.schema.classes: touches.classrefs.add(cast(ClassDefinitionName, element)) cls = self.schema.classes[cast(ClassDefinitionName, element)] if cls.is_a: touches.classrefs.add(cls.is_a) # Mixins include apply_to's touches.classrefs.update(set(cls.mixins)) for slotname in cls.slots: slot = self.schema.slots[slotname] if slot.range in self.schema.classes: touches.classrefs.add(cast(ClassDefinitionName, slot.range)) elif slot.range in self.schema.types: touches.typerefs.add(cast(TypeDefinitionName, slot.range)) for cv in self.schema.classes.values(): if cv.is_a == element: touches.classrefs.add(cv.name) if element in self.synopsis.rangerefs: for slotname in self.synopsis.rangerefs[element]: touches.slotrefs.add(slotname) if self.schema.slots[slotname].domain: touches.classrefs.add(self.schema.slots[slotname].domain) if cls.in_subset: touches.subsetrefs.update(cls.in_subset) if element in self.schema.slots: touches.slotrefs.add(cast(SlotDefinitionName, element)) slot = self.schema.slots[cast(SlotDefinitionName, element)] touches.slotrefs.update(set(slot.mixins)) if slot.is_a: touches.slotrefs.add(slot.is_a) if element in self.synopsis.inverses: touches.slotrefs.update(self.synopsis.inverses[cast(SlotDefinitionName, element)]) if slot.domain: touches.classrefs.add(slot.domain) if slot.range in self.schema.classes: touches.classrefs.add(cast(ClassDefinitionName, slot.range)) elif slot.range in self.schema.types: touches.typerefs.add(cast(TypeDefinitionName, slot.range)) if slot.in_subset: touches.subsetrefs.update(slot.in_subset) for sv in self.schema.slots.values(): if sv.is_a == element: touches.slotrefs.add(sv.name) if element in self.schema.types: touches.typerefs.add(cast(TypeDefinitionName, element)) typ = self.schema.types[cast(TypeDefinitionName, element)] if element in self.synopsis.rangerefs: touches.slotrefs.update(self.synopsis.rangerefs[element]) if typ.typeof: touches.typerefs.add(cast(TypeDefinitionName, typ.typeof)) if typ.in_subset: touches.subsetrefs.update(typ.in_subset) for tv in self.schema.types.values(): if tv.typeof == element: touches.slotrefs.add(tv.name) if element in self.schema.subsets: touches.subsetrefs.add(cast(SubsetDefinitionName, element)) if element in self.synopsis.subsetrefs: touches.update(self.synopsis.subsetrefs[cast(SubsetDefinitionName, element)]) if not bool(touches): self.logger.warning(f"neighborhood({element}) - {element} is undefined") return touches
[docs] def range_type_path(self, typ: TypeDefinition) -> List[str]: """ Return a formatted list of range types from the base up :param typ: type definition whose name is to be formatted :return: List of possible types with base at the leftmost """ formatted_typ_name = self.class_or_type_name(typ.name) if typ.typeof: return self.range_type_path(self.schema.types[cast(TypeDefinitionName, typ.typeof)]) + [formatted_typ_name] elif typ.repr: return [typ.repr, formatted_typ_name] else: return [formatted_typ_name]
[docs] def class_identifier(self, def_or_name: Union[str, ClassDefinition, TypeDefinition]) -> Optional[SlotDefinitionName]: """ Return the class identifier if any :param def_or_name: class name or definition :return: name of class key (or identifier) if one exists """ if isinstance(def_or_name, ClassDefinition): cls = def_or_name elif def_or_name in self.schema.classes: cls = self.schema.classes[cast(ClassDefinitionName, def_or_name)] else: return None for slotname in cls.slots: slot = self.schema.slots[slotname] if slot.identifier or slot.key: return slotname return None
[docs] def enum_identifier_path(self, enum_or_enumname: Union[str, EnumDefinition]) -> List[str]: """ Return an enum_identifier path """ return ['str', camelcase(enum_or_enumname.name if isinstance(enum_or_enumname, EnumDefinition) else enum_or_enumname)]
[docs] def class_identifier_path(self, cls_or_clsname: Union[str, ClassDefinition], force_non_key: bool) -> List[str]: """ Return the path closure to a class identifier if the class has a key and force_non_key is false otherwise return a dictionary closure. :param cls_or_clsname: class definition :param force_non_key: True means inlined even if the class has a key :return: path """ cls = cls_or_clsname if isinstance(cls_or_clsname, ClassDefinition) \ else self.schema.classes[ClassDefinitionName(cls_or_clsname)] # Determine whether the class has a key identifier_slot = None if not force_non_key: identifier_slot = self.class_identifier(cls) # No key or inlined, its closure is a dictionary if identifier_slot is None: return ['dict', self.class_or_type_name(cls.name)] # We're dealing with a reference pathname = camelcase(cls.name + ' ' + self.aliased_slot_name(identifier_slot)) if cls.is_a: parent_identifier_slot = self.class_identifier(cls.is_a) if parent_identifier_slot: return self.class_identifier_path(cls.is_a, False) + [pathname] return self.slot_range_path(identifier_slot) + [pathname]
[docs] def slot_range_path(self, slot_or_name: Union[str, SlotDefinition]) -> List[str]: """ Return a ordered list of slot ranges from distal to proximal :param slot_or_name: slot whose range is being typed :return: ordered list of types from base type forward """ slot = slot_or_name if isinstance(slot_or_name, SlotDefinition) \ else self.schema.slots[cast(SlotDefinitionName, slot_or_name)] if slot.range in self.schema.types: # Type return self.range_type_path(self.schema.types[cast(TypeDefinitionName, slot.range)]) elif slot.range in self.schema.enums: return self.enum_identifier_path(slot.range) else: # Class return self.class_identifier_path(slot.range, bool(slot.inlined))
[docs] def aliased_slot_name(self, slot: Union[SlotDefinitionName, SlotDefinition]) -> SlotDefinitionName: """ Return the overloaded slot name -- the alias if one exists otherwise the actual name @param slot: either a slot name or a definition @return: overloaded name """ if isinstance(slot, str): slot = self.schema.slots[cast(SlotDefinitionName, slot)] return slot.alias if slot.alias else slot.name
[docs] def class_or_type_for(self, name: str) -> Optional[Element]: """ Return the corresponding class or type for name """ if name in self.schema.classes: return self.schema.classes[ClassDefinitionName(name)] elif name in self.schema.types: return self.schema.types[TypeDefinitionName(name)] elif name in self.schema.enums: return self.schema.enums[EnumDefinitionName(name)] return None
[docs] def class_or_type_name(self, name: str) -> str: """ Return the camelcase representation of clsname if it is a valid class or type. Prepend "Unknown" if the name isn't valid """ if name in self.schema.classes: return camelcase(name) elif name in self.schema.types: typ = self.schema.types[cast(TypeDefinitionName, name)] if typ.typeof: return camelcase(name) else: return typ.base else: return "Unknown_" + camelcase(name)
[docs] def slot_for(self, name: str) -> Optional[Element]: return self.schema.slots.get(name)
[docs] def slot_name(self, name: str) -> str: """ Return the underscored version of the aliased slot name if name is a slot. Prepend "unknown_" if the name isn't valid. """ slot = self.slot_for(name) return underscore(self.aliased_slot_name(slot) if slot else ("unknown " + name))
[docs] def subset_for(self, name: str) -> Optional[Element]: return self.schema.subsets.get(name)
[docs] def subset_name(self, name: str) -> str: subset = self.subset_for(name) return ('' if subset else "Unknown_") + camelcase(name)
[docs] def formatted_element_name(self, el_or_elname: Union[ElementName, Element], is_range_name: bool = False) -> Optional[str]: """ Return the default format for the name or the referenced element. Slots are under_scored, all others are CamelCased. Slot names are the alias, not the actual name :param el_or_elname: element or name to map :param is_range_name: True means that we're looking for a class or type. False means Slot or Subset. Only applies if el_or_elname is an ElementName (otherwise we know what we've got :return: Formatted name if type can be known else None """ if isinstance(el_or_elname, str): if is_range_name: return self.class_or_type_name(el_or_elname) \ if el_or_elname in self.schema.classes or \ el_or_elname in self.schema.types or \ el_or_elname == self.schema.default_range else None elif el_or_elname in self.schema.slots: return self.slot_name(cast(SlotDefinitionName, el_or_elname)) elif el_or_elname in self.schema.subsets: return self.subset_name(el_or_elname) return None elif isinstance(el_or_elname, (ClassDefinition, TypeDefinition)): return self.class_or_type_name(el_or_elname.name) elif isinstance(el_or_elname, SlotDefinition): return self.slot_name(el_or_elname.name) elif isinstance(el_or_elname, SubsetDefinition): return self.subset_name(el_or_elname.name) else: return None
[docs] def obj_for(self, el_or_elname: str, is_range_name: bool = False) -> Optional[Element]: if is_range_name: return self.class_or_type_for(el_or_elname) \ if el_or_elname in self.schema.classes or \ el_or_elname in self.schema.types or \ el_or_elname == self.schema.default_range else None elif el_or_elname in self.schema.slots: return self.slot_for(cast(SlotDefinitionName, el_or_elname)) elif el_or_elname in self.schema.subsets: return self.subset_for(el_or_elname) else: return self.class_or_type_for(el_or_elname)
[docs] def default_prefix(self) -> Optional[str]: """ Return the default prefix for the schema @return: URI or NCNAME of default prefix """ if '://' in self.schema.default_prefix: return self.schema.default_prefix else: # Basic loader tests for valid default prefix return self.schema.prefixes[PrefixPrefixPrefix(self.schema.default_prefix)].prefix_reference
# TODO: add lru cache once we get identity into the classes
[docs] def domain_slots(self, cls: ClassDefinition) -> List[SlotDefinition]: """ Return all slots in the class definition that are owned by the class """ return [slot for slot in [self.schema.slots[sn] for sn in cls.slots] if cls.name in slot.domain_of or (set(cls.mixins).intersection(slot.domain_of))]
[docs] def add_mappings(self, defn: Definition) -> None: """ Process any mappings in defn, adding all of the mappings prefixes to the namespace map :param defn: Class or Slot Definition """ self.add_id_prefixes(defn) mappings = defn.mappings + defn.related_mappings + defn.close_mappings + \ defn.narrow_mappings + defn.broad_mappings + defn.exact_mappings # see https://github.com/linkml/linkml/pull/283 if isinstance(defn, ClassDefinition): mappings.append(defn.class_uri) if isinstance(defn, SlotDefinition): mappings.append(defn.slot_uri) for mapping in mappings: if '://' in str(mapping): mcurie = self.namespaces.curie_for(mapping) if mcurie is None: self.logger.warning(f"No namespace defined for URI: {mapping}") return # Absolute path - no prefix/name else: mapping = mcurie if ':' not in mapping or len(mapping.split(':')) != 2: raise ValueError(f"Definition {defn.name} - unrecognized mapping: {mapping}") ns = mapping.split(':')[0] logging.debug(f'Adding {ns} from {mapping} // {defn}') if ns: self.add_prefix(ns)
[docs] def add_id_prefixes(self, element: Element) -> None: for id_prefix in element.id_prefixes: self.add_prefix(id_prefix)
[docs] def add_prefix(self, ncname: str) -> None: """ Add a prefix to the list of prefixes to emit @param ncname: name to add """ if ncname not in self.namespaces: self.logger.warning(f"Unrecognized prefix: {ncname}") self.namespaces[ncname] = f"http://example.org/UNKNOWN/{ncname}/" self.emit_prefixes.add(ncname)
[docs] def get_metamodel_slot_name(self, slot_name: str) -> str: """ Allows for localization of some generators, such as markdown generator The client can control how metamodel elements are displayed; e.g. mapping the metamodel slot "slot" to "field" This method also takes care of pluralization; e.g. if "slot" is mapped to "field", then "slots" will be mapped to "fields" :param slot_name: :return: """ is_capitalized = slot_name[0].isupper() def capitalize(s: str): if is_capitalized: return f'{s[0].upper()}{s[1:]}' else: return s slot_name_normalized = underscore(slot_name).lower() slot_name_normalized_singular = re.sub(r's$', '', slot_name_normalized) if slot_name_normalized == 'classes': slot_name_normalized_singular = 'class' if self.metamodel_name_map is not None and slot_name_normalized in self.metamodel_name_map: return capitalize(self.metamodel_name_map[slot_name_normalized]) elif self.metamodel_name_map is not None and slot_name_normalized_singular in self.metamodel_name_map: return capitalize(f'{self.metamodel_name_map[slot_name_normalized_singular]}s') else: return slot_name
[docs] def is_class_unconstrained(self, cls: ClassDefinition): """ Determine if the class is mapped to typing.Any, i.e the unconstrained class :param cls: class definition :return: true if the class is unconstrained """ return cls.class_uri == 'linkml:Any'
[docs]def shared_arguments(g: Type[Generator]) -> Callable[[Command], Command]: _LOG_LEVEL_STRINGS = ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'] def _log_level_string_to_int(log_level_string: str) -> int: log_level_string = log_level_string.upper() level = [e for e in log_level_string if e.startswith(log_level_string)] if not level: pass log_level_int = getattr(logging, log_level_string[0], logging.INFO) assert isinstance(log_level_int, int) return log_level_int def decorator(f: Command) -> Command: f.params.append( Argument(("yamlfile", ), type=click.Path(exists=True, dir_okay=False))) f.params.append( Option(("--format", "-f"), type=click.Choice(g.valid_formats), help=f"Output format (default={g.valid_formats[0]})", default=g.valid_formats[0])) f.params.append( Option(("--metadata/--no-metadata", ), default=True, help="Include metadata in output (default=--metadata)")) f.params.append( Option(("--useuris/--metauris", ), default=True, help="Include metadata in output (default=--useuris)")) f.params.append( Option(("--importmap", "-im"), type=click.File(), help="Import mapping file") ) f.params.append( Option(("--log_level", ), type=click.Choice(_LOG_LEVEL_STRINGS), help=f"Logging level (default={DEFAULT_LOG_LEVEL})", default=DEFAULT_LOG_LEVEL) ) f.params.append( Option(("--mergeimports/--no-mergeimports", ), default=True, help="Merge imports into source file (default=mergeimports)")) return f return decorator