Source code for linkml.generators.jsonschemagen

import logging
import os
from copy import deepcopy
from typing import Union, TextIO, Optional, Dict, Tuple

import click
from jsonasobj2 import JsonObj, as_json
from linkml_runtime.linkml_model.meta import SchemaDefinition, ClassDefinition, SlotDefinition, EnumDefinition, PermissibleValue, PermissibleValueText
from linkml_runtime.utils.formatutils import camelcase, be, underscore

from linkml.utils.generator import Generator, shared_arguments

# Map from underlying python data type to json equivalent
# Note: The underlying types are a union of any built-in python datatype + any type defined in
#       linkml-runtime/utils/metamodelcore.py
# Note the keys are all lower case
json_schema_types: Dict[str, Tuple[str, Optional[str]]] = {
    "int": ("integer", None),
    "integer": ("integer", None),
    "bool": ("boolean", None),
    "boolean": ("boolean", None),
    "float": ("number", None),
    "double": ("number", None),
    "decimal": ("number", None),
    "xsddate": ("string", "date"),
    "xsddatetime": ("string", "date-time"),
    "xsdtime": ("string", "time"),
}

WITH_OPTIONAL_IDENTIFIER_SUFFIX = '__identifier_optional'

[docs]class JsonSchemaGenerator(Generator): """ Generates JSONSchema documents from a LinkML SchemaDefinition - Each linkml class generates a schema - inheritance hierarchies are rolled-down from ancestors - Composition not yet implemented - Enumerations treated as strings - Foreign key references are treated as semantics-free strings """ generatorname = os.path.basename(__file__) generatorversion = "0.0.2" valid_formats = ["json"] visit_all_class_slots = True def __init__(self, schema: Union[str, TextIO, SchemaDefinition], top_class: Optional[str] = None, **kwargs) -> None: """ Instantiation :param schema: :param top_class: root class for JSONSchema generation :param kwargs: """ super().__init__(schema, **kwargs) self.schemaobj: JsonObj = None self.clsobj: JsonObj = None self.inline = False self.topCls = top_class ## JSON object is one instance of this self.entryProperties = {} # JSON-Schema does not have inheritance, # so we duplicate slots from inherited parents and mixins self.visit_all_slots = True # Maps e.g. Person --> Person__identifier_optional # for use when Person is a range of an inlined-as-dict slot self.optional_identifier_class_map: Dict[str, Tuple[str, str]] = {}
[docs] def visit_schema(self, inline: bool = False, not_closed=True, **kwargs) -> None: self.inline = inline self.schemaobj = JsonObj(title=self.schema.name, type="object", metamodel_version=self.schema.metamodel_version, version=self.schema.version if self.schema.version else None, properties={}, additionalProperties=not_closed) for p, c in self.entryProperties.items(): self.schemaobj['properties'][p] = { 'type': "array", 'items': {'$ref': f"#/$defs/{camelcase(c)}"}} self.schemaobj['$schema'] = "http://json-schema.org/draft-07/schema#" self.schemaobj['$id'] = self.schema.id self.schemaobj['$defs'] = JsonObj() self.schemaobj['required'] = []
[docs] def end_schema(self, **_) -> None: # create more lax version of every class that is used as an inlined dict reference; # in this version, the primary key/identifier is optional, since it is used as the key of the dict for cls_name, (id_slot, cls_name_lax) in self.optional_identifier_class_map.items(): lax_cls = deepcopy(self.schemaobj['$defs'][cls_name]) lax_cls.required.remove(id_slot) self.schemaobj['$defs'][cls_name_lax] = lax_cls print(as_json(self.schemaobj, sort_keys=True))
[docs] def visit_class(self, cls: ClassDefinition) -> bool: if cls.mixin or cls.abstract: return False additional_properties = False if self.is_class_unconstrained(cls): additional_properties = True self.clsobj = JsonObj(title=camelcase(cls.name), type='object', properties=JsonObj(), required=[], additionalProperties=additional_properties, description=be(cls.description)) return True
[docs] def end_class(self, cls: ClassDefinition) -> None: self.schemaobj['$defs'][camelcase(cls.name)] = self.clsobj
[docs] def visit_enum(self, enum: EnumDefinition) -> bool: # TODO: this only works with explicitly permitted values. It will need to be extended to # support other pv_formula def extract_permissible_text(pv): if type(pv) is str: return pv if type(pv) is PermissibleValue: return pv.text.code if type(pv) is PermissibleValueText: return pv raise ValueError(f'Invalid permissible value in enum {enum}: {pv}') permissible_values_texts = list(map(extract_permissible_text, enum.permissible_values or [])) self.schemaobj['$defs'][camelcase(enum.name)] = JsonObj( title=camelcase(enum.name), type='string', enum=permissible_values_texts, description=be(enum.description))
[docs] def visit_class_slot(self, cls: ClassDefinition, aliased_slot_name: str, slot: SlotDefinition) -> None: typ = None # JSON Schema type (https://json-schema.org/understanding-json-schema/reference/type.html) reference = None # Reference to a JSON schema entity (https://json-schema.org/understanding-json-schema/structuring.html#ref) fmt = None # JSON Schema format (https://json-schema.org/understanding-json-schema/reference/string.html#format) reference_obj = None if slot.range in self.schema.types: (typ, fmt) = json_schema_types.get(self.schema.types[slot.range].base.lower(), ("string", None)) elif slot.range in self.schema.enums: reference_obj = camelcase(slot.range) reference = f"#/$defs/{reference_obj}" typ = 'object' elif slot.range in self.schema.classes and slot.inlined: reference_obj = camelcase(slot.range) reference = f"#/$defs/{reference_obj}" typ = 'object' else: typ = "string" if slot.inlined: range_cls = self.schema.classes[slot.range] id_slot = None for sn in range_cls.slots: s = self.schema.slots[sn] # TODO: extension_tag should be declared as a key in extensions.yaml in metamodel if s.identifier or s.key or (s.alias == 'tag' and (range_cls == 'extension' or range_cls == 'annotation')): id_slot = s break # If inline we have to include redefined slots ref = JsonObj() ref['$ref'] = reference if slot.multivalued: if id_slot is not None and not slot.inlined_as_list: prop = JsonObj(additionalProperties={'$ref': f'{reference}{WITH_OPTIONAL_IDENTIFIER_SUFFIX}'}) if id_slot.alias is not None: id_slot_name = id_slot.alias else: id_slot_name = id_slot.name self.optional_identifier_class_map[reference_obj] = (id_slot_name, f'{reference_obj}{WITH_OPTIONAL_IDENTIFIER_SUFFIX}') else: prop = JsonObj(type="array", items=ref) else: prop = ref else: if slot.multivalued: if reference is not None: prop = JsonObj(type="array", items={'$ref': reference}) elif fmt is None: prop = JsonObj(type="array", items={'type': typ}) else: prop = JsonObj(type="array", items={'type': typ, 'format': fmt}) else: if reference is not None: prop = JsonObj({'$ref': reference}) elif fmt is None: prop = JsonObj(type=typ) else: prop = JsonObj(type=typ, format=fmt) if slot.description: prop.description = slot.description if slot.required: self.clsobj.required.append(underscore(aliased_slot_name)) if slot.pattern: # See https://github.com/linkml/linkml/issues/193 prop.pattern = slot.pattern if slot.minimum_value is not None: prop.minimum = slot.minimum_value if slot.maximum_value is not None: prop.maximum = slot.maximum_value self.clsobj.properties[underscore(aliased_slot_name)] = prop if (self.topCls is not None and camelcase(self.topCls) == camelcase(cls.name)) or \ (self.topCls is None and cls.tree_root): self.schemaobj.properties[underscore(aliased_slot_name)] = prop if slot.required: self.schemaobj.required.append(underscore(aliased_slot_name))
@shared_arguments(JsonSchemaGenerator) @click.command() @click.option("-i", "--inline", is_flag=True, help=""" Generate references to types rather than inlining them. Note that declaring a slot as inlined: true will always inline the class """) @click.option("-t", "--top-class", help=""" Top level class; slots of this class will become top level properties in the json-schema """) @click.option("--not-closed/--closed", default=True, show_default=True, help=""" Set additionalProperties=False if closed otherwise true if not closed at the global level """) def cli(yamlfile, **kwargs): """ Generate JSON Schema representation of a LinkML model """ print(JsonSchemaGenerator(yamlfile, **kwargs).serialize(**kwargs)) if __name__ == '__main__': cli()