"""
An annotation is immutable, but the features it contains are mutable.
"""
import sys
from typing import List, Union, Dict, Set
from functools import total_ordering
from gatenlp.feature_bearer import FeatureBearer, FeatureViewer
from gatenlp.offsetmapper import OFFSET_TYPE_JAVA
from gatenlp._utils import support_annotation_or_set
[docs]@total_ordering
class Annotation(FeatureBearer):
"""
An annotation represents information about a span of text. It contains the start and end
offsets of the span, an "annotation type" and it is a feature bearer.
In addition it contains an id which has no meaning for the annotation itself but is
used to uniquely identify an annotation within the set it is contained in.
All fields except the features are immutable, once the annotation has been created
only the features can be changed.
"""
@property
def type(self):
return self._type
@property
def start(self):
return self._start
@property
def end(self):
return self._end
# TODO: we should get rid of this attribute completely!
@property
def gatenlp_type(self):
return self._gatenlp_type
@property
def features(self):
return FeatureViewer(self._features, changelog=self.changelog, logger=self._log_feature_change)
@property
def id(self):
return self._id
def __init__(self, start: int, end: int, annot_type: str,
annid: int = 0,
features=None):
"""
Create a new annotation instance. NOTE: this should almost never be done directly
and instead the method annotation_set.add should be used!
Once an annotation has been created, the start, end, type and id fields must not
be changed!
:param start: start offset of the annotation
:param end: end offset of the annotation
:param annot_type: annotation type
:param annot_id: the id of the annotation
:param owner_set: the containing annotation set
:param features: an initial collection of features, None for no features.
"""
super().__init__(features)
self._gatenlp_type = "Annotation"
# print("Creating Ann with changelog {} ".format(changelog), file=sys.stderr)
self._type = annot_type
self._start = start
self._end = end
self._id = annid
self._owner_set = None
def _changelog(self):
"""
Return the changelog of the owning set, if there is one, or None.
:return: the changelog
"""
if self._owner_set is not None:
return self._owner_set.changelog
# TODO: for now at least, make sure only simple JSON serialisable things are used! We do NOT
# allow any user specific types in order to make sure what we create is interchangeable with GATE.
# In addition we do NOT allow None features.
# So a feature name always has to be a string (not None), the value has to be anything that is json
# serialisable (except None keys for maps).
# For performance reasons we check the feature name but not the value (maybe make checking optional
# on by default but still optional?)
def _log_feature_change(self, command: str, feature: str = None, value=None) -> None:
if self._changelog() is None:
return
ch = {
"command": command,
"type": "annotation",
"set": self._owner_set.name,
"id": self.id}
if feature is not None:
ch["feature"] = feature
if value is not None:
ch["value"] = value
self.changelog.append(ch)
def __eq__(self, other) -> bool:
"""
Two annotations are identical if they are the same object or if all the fields
are equal.
:param other: the object to compare with
:return: if the annotations are equal
"""
if not isinstance(other, Annotation):
return False
if self is other:
return True
return self.start == other.start and self.end == other.end and \
self.type == other.type and self.id == other.id and self._features == other._features
# The old way to test for equality simply checked if owning set and id where identical
#if self._owner_set != other._owner_set:
# return False
#if self.id != other.id:
# return False
#else:
# return True
def __hash__(self):
"""
The hash depends on the annotation ID and the owning set.
:return: hash
"""
return hash((self.id, self._owner_set))
def __lt__(self, other) -> bool:
"""
Comparison for sorting: this sorts by increasing start offset, then increasing end offset, then increasing
type name, then increasing annotation id.
NOTE: for now the other object has to be an instance of Annotation, duck typing is not supported!
:param other: another annotation
:return:
"""
if not isinstance(other, Annotation):
raise Exception("Cannot compare to non-Annotation")
if self.start < other.start:
return True
elif self.start > other.start:
return False
else:
if self.end < other.end:
return True
elif self.end > other.end:
return False
else:
if self.type < other.type:
return True
elif self.type > other.type:
return False
else:
if self.id < other.id:
return True
else:
return False
def __repr__(self) -> str:
"""
String representation of the annotation.
:return: string representation
"""
return "Annotation({},{},{},id={},features={})".format(self.start, self.end, self.type, self.id, self._features)
def __len__(self) -> int:
"""
The length of the annotation is the length of the offset span. Since the end offset is one after the last
element, we return end-start-1
:return:
"""
return self.end - self.start - 1
[docs] def is_inside(self, offset: int) -> bool:
"""
Check if the given offset falls somewhere inside the span of this annotation.
:param offset: the offset to check
:return: True if the offset is inside the span of this annotation
"""
return self.start <= offset < self.end
[docs] @support_annotation_or_set
def is_overlapping(self, start: int, end: int) -> bool:
"""
Checks if this annotation is overlapping with the given span, annotation or
annotation set.
An annotation is overlapping with a span if the first or last character
is inside that span.
:param start: start offset of the span
:param end: end offset of the span
:return: True if overlapping, False otherwise
"""
return self.is_inside(start) or self.is_inside(end - 1)
[docs] @support_annotation_or_set
def is_coextensive(self, start: int, end: int) -> bool:
"""
Checks if this annotation is coextensive with the given span, annotation or
annotation set, i.e. has exactly the same start and end offsets.
:param start: start offset of the span
:param end: end offset of the span
:return: True if coextensive, False otherwise
"""
return self.start == start and self.end == end
[docs] @support_annotation_or_set
def is_within(self, start: int, end: int) -> bool:
"""
Checks if this annotation is within the given span, annotation or
annotation set, i.e. both the start and end offsets of this annotation
are after the given start and before the given end.
:param start: start offset of the span
:param end: end offset of the span
:return: True if within, False otherwise
"""
return start <= self.start and end >= self.end
[docs] @support_annotation_or_set
def is_covering(self, start: int, end: int) -> bool:
"""
Checks if this annotation is covering the given span, annotation or
annotation set, i.e. both the given start and end offsets
are after the start of this annotation and before the end of this annotation.
:param start: start offset of the span
:param end: end offset of the span
:return: True if within, False otherwise
"""
return self.start <= start and self.end >= end
def _json_repr(self, **kwargs) -> Dict:
if "offset_mapper" in kwargs:
om = kwargs["offset_mapper"]
to_type = kwargs["offset_type"]
if to_type == OFFSET_TYPE_JAVA:
start = om.convert_to_java(self.start)
end = om.convert_to_java(self.end)
else:
start = om.convert_to_python(self.start)
end = om.convert_to_python(self.end)
else:
start = self.start
end = self.end
return {
"start": start,
"end": end,
"type": self.type,
"id": self.id,
"features": self._features,
"gatenlp_type": self.gatenlp_type # TODO: get rid of this!!
}
@staticmethod
def _from_json_map(jsonmap, **kwargs) -> "Annotation":
ann = Annotation(jsonmap.get("start"), jsonmap.get("end"), jsonmap.get("type"), jsonmap.get("id"),
features=jsonmap.get("features"))
return ann
# def __setattr__(self, key, value):
# """
# Prevent start, stop, type and annotation id from getting overridden, once they have been
# set.
# :param key: attribute to set
# :param value: value to set attribute to
# :return:
# """
# print(f"Trying to set {key} to {value}")
# if key == "start" or key == "end" or key == "type" or key == "id":
# if self.__getattribute__(key) is None:
# print("Seems this is Null")
# super().__setattr__(key, value)
# else:
# raise Exception("Annotation attributes cannot get changed after being set")
# else:
# super().__setattr__(key, value)
[docs] def to_dict(self, offset_mapper=None, offset_type=None):
if offset_mapper is not None:
if offset_type == OFFSET_TYPE_JAVA:
start = offset_mapper.convert_to_java(self._start)
end = offset_mapper.convert_to_java(self._end)
else:
start = offset_mapper.convert_to_python(self._start)
end = offset_mapper.convert_to_python(self._end)
else:
start = self._start
end = self._end
return {
"type": self.type,
"start": start,
"end": end,
"id": self.id,
"features": self._features,
}
[docs] @staticmethod
def from_dict(dictrepr, owner_set=None, **kwargs):
ann = Annotation(
start=dictrepr.get("start"),
end=dictrepr.get("end"),
annot_type=dictrepr.get("type"),
annid=dictrepr.get("id"),
features=dictrepr.get("features")
)
ann._owner_set = owner_set
return ann