from typing import List, Callable, Dict
import sys
from gatenlp.offsetmapper import OffsetMapper, OFFSET_TYPE_JAVA, OFFSET_TYPE_PYTHON
import importlib
[docs]class ChangeLog:
def __init__(self):
self.gatenlp_type = "ChangeLog"
self.changes = []
self.offset_type = OFFSET_TYPE_PYTHON
[docs] def append(self, element: Dict):
assert isinstance(element, dict)
self.changes.append(element)
def __len__(self) -> int:
return len(self.changes)
def _fixup_changes(self, method: Callable, replace=False) -> List[Dict]:
"""
In-place modify the annotation offsets of the changes according to
the given method.
:param method: an object method method for converting offsets from or to python.
:param replace: if True, modifies the original change objects in the changelog, otherwise, uses copies
:return: the modified changes, a reference to the modified changes list of the instance
"""
if not replace:
newchanges = []
for change in self.changes:
if not replace:
chg = dict(change)
else:
chg = change
if "start" in change:
chg["start"] = method(change["start"])
if "end" in change:
chg["end"] = method(change["end"])
if not replace:
newchanges.append(chg)
if replace:
return self.changes
else:
return newchanges
[docs] def fixup_changes(self, offset_mapper, offset_type, replace=True):
"""
Update the offsets of all annotations in this changelog to the desired
offset type, if necessary. If the ChangeLog already has that offset type, this does nothing.
:param offset_mapper: a prepared offset mapper to use
:param offset_type: the desired offset type
:param replace: if True, replaces the original offsets in the original change objects, otherwise creates
new change objects and a new changes list and returs it.
:return: a reference to the modified changes
"""
if offset_type != self.offset_type:
if offset_type == OFFSET_TYPE_JAVA:
method = offset_mapper.convert_to_java
elif offset_type == OFFSET_TYPE_PYTHON:
method = offset_mapper.convert_to_python
else:
raise Exception("Not a proper offset type: {}".format(offset_type))
if replace:
self.offset_type = offset_type
return self._fixup_changes(method, replace=replace)
else:
return self.changes
def __repr__(self) -> str:
return "ChangeLog([{}])".format(",".join([str(c) for c in self.changes]))
def _json_repr(self, **kwargs) -> Dict:
offset_type = self.offset_type
changes = self.changes
if "offset_type" in kwargs and kwargs["offset_type"] != offset_type:
om = kwargs.get("offset_mapper")
if om is None:
raise Exception("Need to convert offsets, but no offset_mapper parameter given")
offset_type = kwargs["offset_type"]
if offset_type == OFFSET_TYPE_JAVA:
changes = self._fixup_changes(om.convert_to_java)
else:
changes = self._fixup_changes(om.convert_to_python)
return {
"changes": changes,
"offset_type": offset_type,
"gatenlp_type": self.gatenlp_type
}
@staticmethod
def _from_json_map(jsonmap, **kwargs) -> "ChangeLog":
cl = ChangeLog()
cl.changes = jsonmap.get("changes")
cl.offset_type = jsonmap.get("offset_type")
if cl.offset_type == OFFSET_TYPE_JAVA:
# we need either an offset mapper or a document
if "offset_mapper" in kwargs:
om = kwargs.get("offset_mapper")
elif "document" in kwargs:
om = OffsetMapper(kwargs.get("document"))
else:
raise Exception("Loading a changelog with offset_type JAVA, need kwarg 'offset_mapper' or 'document'")
cl._fixup_changes(om.convert_to_python)
return cl
[docs] def to_dict(self, **kwargs):
offset_type = self.offset_type
changes = self.changes
if "offset_type" in kwargs and kwargs["offset_type"] != offset_type:
om = kwargs.get("offset_mapper")
if om is None:
raise Exception("Need to convert offsets, but no offset_mapper parameter given")
offset_type = kwargs["offset_type"]
if offset_type == OFFSET_TYPE_JAVA:
changes = self._fixup_changes(om.convert_to_java, replace=False)
else:
changes = self._fixup_changes(om.convert_to_python, replace=False)
return {
"changes": changes,
"offset_type": offset_type
}
[docs] @staticmethod
def from_dict(dictrepr, **kwargs):
if dictrepr is None:
return None
cl = ChangeLog()
cl.changes = dictrepr.get("changes")
cl.offset_type = dictrepr.get("offset_type")
if cl.offset_type == OFFSET_TYPE_JAVA:
# we need either an offset mapper or a document
if "offset_mapper" in kwargs:
om = kwargs.get("offset_mapper")
elif "document" in kwargs:
om = OffsetMapper(kwargs.get("document"))
else:
raise Exception("Loading a changelog with offset_type JAVA, need kwarg 'offset_mapper' or 'document'")
cl._fixup_changes(om.convert_to_python)
return cl
[docs] def save(self, whereto, fmt="json", offset_type=None, offset_mapper=None, mod="gatenlp.serialization.default", **kwargs):
"""
Save the document in the given format.
Additional keyword parameters for format "json":
* as_array: boolean, if True stores as array instead of dictionary, using to
:param whereto: either a file name or something that has a write(string) method.
:param fmt: serialization format, one of "json", "msgpack" or "pickle"
:param offset_type: store using the given offset type or keep the current if None
:param offset_mapper: nedded if the offset type should get changed
:param mod: module to use
:param kwargs: additional parameters for the format
:return:
"""
m = importlib.import_module(mod)
ser = m.FORMATS[fmt]
ser.save(ChangeLog, self, to_file=whereto, offset_type=offset_type, offset_mapper=offset_mapper, **kwargs)
[docs] def save_mem(self, fmt="json", offset_type=None, offset_mapper=None, mod="gatenlp.serialization.default", **kwargs):
"""
Serialize and save to a string.
Additional keyword parameters for format "json":
* as_array: boolean, if True stores as array instead of dictionary, using to
:param fmt: serialization format, one of "json", "msgpack" or "pickle"
:param offset_type: store using the given offset type or keep the current if None
:param offset_mapper: nedded if the offset type should get changed
:param mod: module to use
:param kwargs: additional parameters for the format
:return:
"""
m = importlib.import_module(mod)
ser = m.FORMATS[fmt]
return ser.save(ChangeLog, self, to_mem=True, offset_type=offset_type, offset_mapper=offset_mapper, **kwargs)
[docs] @staticmethod
def load(wherefrom, fmt="json", offset_mapper=None, mod="gatenlp.serialization.default", **kwargs):
"""
:param wherefrom:
:param fmt:
:param offset_mapper: offset mapper in case the offsets need to get converted
:param kwargs:
:return:
"""
m = importlib.import_module(mod)
ser = m.FORMATS[fmt]
doc = ser.load(ChangeLog, from_file=wherefrom, offset_mapper=offset_mapper, **kwargs)
if doc.offset_type == OFFSET_TYPE_JAVA:
doc.to_type(OFFSET_TYPE_PYTHON)
return doc
[docs] @staticmethod
def load_mem(wherefrom, fmt="json", offset_mapper=None, mod="gatenlp.serialization.default", **kwargs):
"""
Note: the offset type is always converted to PYTHON when loading!
:param wherefrom: the string to deserialize
:param fmt:
:param offset_mapper: offset mapper in case the offsets need to get converted
:param kwargs:
:return:
"""
m = importlib.import_module(mod)
ser = m.FORMATS[fmt]
doc = ser.load(ChangeLog, from_mem=wherefrom, offset_mapper=offset_mapper, **kwargs)
if doc.offset_type == OFFSET_TYPE_JAVA:
doc.to_type(OFFSET_TYPE_PYTHON)
return doc