Source code for gatenlp.docformats.msgpack

"""
GATE-specific (de)serialisation of documents using MsgPack
"""
import json
import gzip
from ..document import Document
from ..annotation import Annotation
from ..annotation_set import AnnotationSet
from ..changelog import ChangeLog


[docs]def get_object_encoder(**kwargs): """ Returns a function for encoding our own objects. This simply checks if the object has the method "json_repr" and if yes, calls it with the kwargs we got. :return: """ # Todo: check https://realpython.com/python-json/#encoding-and-decoding-custom-python-objects and similar # again for how to do this correctly. Instead of providing our own default method, maybe override # the JSONEncoder class: has the advantage that we can fallback to the default default method! def object_encoder(obj): if hasattr(obj, "_json_repr"): return obj._json_repr(**kwargs) else: # objtypename = obj.__class__.__name__ raise TypeError("Cannot JSON-serialise {} of type {}".format(obj, type(obj))) return object_encoder
[docs]def get_object_hook(**kwargs): """ Returns a method that will try to convert the passed map into one of our objects :param kwargs: the kwargs to use for converting back. :return: the object hook function """ def object_hook(thedict): # NOTE: we need to explicitly see the type, duck typing could get mislead # by other objects that just happen to have similar fields! if not "gatenlp_type" in thedict: return thedict ourtype = thedict.get("gatenlp_type") if ourtype == "Document": return Document._from_json_map(thedict, **kwargs) elif ourtype == "Annotation": return Annotation._from_json_map(thedict, **kwargs) elif ourtype == "AnnotationSet": return AnnotationSet._from_json_map(thedict, **kwargs) elif ourtype == "ChangeLog": return ChangeLog._from_json_map(thedict, **kwargs) else: return thedict return object_hook
[docs]def load(fp, **kwargs): """ Load gatenlp object from fp, a file-like object and return it. :param fp: a file-like object, as required by json.load :return: the gatenlp object """ return json.load(fp, object_hook=get_object_hook(**kwargs))
[docs]def loads(str, **kwargs): """ Create gatenlp object from JSON string and return it. :param str: JSON string :return: the gatenlp object """ return json.loads(str, object_hook=get_object_hook(**kwargs))
[docs]def dump(fp, obj, indent=None, **kwargs): """ Write the given gatenlp object to the file. :param fp: a file like object as required by json.dump :param obj: the object to save :param indent: passed on to jsom.dump :param kwargs: :return: """ json.dump(fp, obj, indent=indent, default=get_object_encoder(**kwargs))
[docs]def dumps(obj, indent=None, **kwargs): """ Create JSON string representing the given object. :param obj: the object :param indent: passed on to json.dumps :param kwargs: offset_type: if specified and OFFSET_TYPE_JAVA, convert the offsets to java offsets in the JSON offset_mapper: if specified, used for the offset mapping if an offset mapper cannot otherwise be found :return: JSON string """ return json.dumps(obj, indent=indent, default=get_object_encoder(**kwargs))
[docs]def load_file(filename, **kwargs): """ Shortcut for opening the file for reading and loading from the stream. If the filename ends with ".gz" the file is automatically uncompressed. :param filename: file to load :param kwargs: :return: the loaded object """ if filename.endswith(".gz"): opener = gzip.open mode = "rt" encoding = "utf-8" else: opener = open mode = "rt" encoding = "utf-8" with opener(filename, mode, encoding=encoding) as fp: return load(fp, **kwargs)
[docs]def dump_file(obj, filename, indent=None, **kwargs): """ Shortcut for opening the file for writing and dumping to the stream. If the file name ends with .gz, automatically compresses the output file. :param obj: the object to save :param filename: the file to write to :param indent: passed on to json.dump :param kwargs: :return: """ if filename.endswith(".gz"): opener = gzip.open mode = "wt" encoding = "utf-8" else: opener = open mode = "wt" encoding = "utf-8" with opener(filename, mode, encoding=encoding) as fp: dump(obj, fp)