Source code for indra.sources.eidos.processor

from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str
import json
import logging
import objectpath
from indra.statements import Influence, Agent, Evidence


logger = logging.getLogger('eidos')


[docs]class EidosJsonLdProcessor(object): """This processor extracts INDRA Statements from Eidos JSON-LD output. Parameters ---------- json_dict : dict A JSON dictionary containing the Eidos extractions in JSON-LD format. Attributes ---------- tree : objectpath.Tree The objectpath Tree object representing the extractions. statements : list[indra.statements.Statement] A list of INDRA Statements that were extracted by the processor. """ def __init__(self, json_dict): self.tree = objectpath.Tree(json_dict) self.statements = [] def get_events(self): events = \ self.tree.execute("$.extractions[(@.@type is 'DirectedRelation')]") if not events: return entities = \ self.tree.execute("$.extractions[(@.@type is 'Entity')]") entity_ids = \ self.tree.execute("$.extractions[(@.@type is 'Entity')].@id") entity_dict = {id:entity for id, entity in zip(entity_ids, entities)} # The first state corresponds to increase/decrease def get_polarity(x): # x is either subj or obj if 'states' in x.keys(): if x['states'][0]['type'] == 'DEC': return -1 elif x['states'][0]['type'] == 'INC': return 1 else: return None else: return None def get_adjectives(x): # x is either subj or obj if 'states' in x.keys(): if 'modifiers' in x['states'][0].keys(): return [mod['text'] for mod in x['states'][0]['modifiers']] else: return [] def _get_eidos_groundings(entity): """Return Eidos groundings are a list of tuples with scores.""" return [(g['ontologyConcept'], g['value']) for g in entity.get('grounding', [])] def _make_agent(entity): """Return an Agent from an Eidos entity.""" # For now we just use the text for the agent as the name name = entity['text'] # Save raw text and Eidos scored groundings as db_refs db_refs = {'TEXT': entity['text'], 'EIDOS': _get_eidos_groundings(entity)} agent = Agent(name, db_refs=db_refs) return agent for event in events: if 'Causal' in event['labels']: # For now, just take the first source and first destination. # Later, might deal with hypergraph representation. subj = entity_dict[event['sources'][0]['@id']] obj = entity_dict[event['destinations'][0]['@id']] subj_delta = {'adjectives': get_adjectives(subj), 'polarity': get_polarity(subj)} obj_delta = {'adjectives': get_adjectives(obj), 'polarity': get_polarity(obj)} evidence = self._get_evidence(event) st = Influence(_make_agent(subj), _make_agent(obj), subj_delta, obj_delta, evidence=evidence) self.statements.append(st) @staticmethod def _get_evidence(event): text = event.get('text') annotations = { 'found_by' : event.get('rule'), 'provenance' : event.get('provenance'), } ev = Evidence(source_api='eidos', text=text, annotations=annotations)
return [ev]
[docs]class EidosJsonProcessor(object): """This processor extracts INDRA Statements from Eidos JSON (not JSON-LD) output. Parameters ---------- json_dict : dict A JSON dictionary containing the Eidos extractions in JSON (not JSON-LD) format. Attributes ---------- tree : objectpath.Tree The objectpath Tree object representing the extractions. statements : list[indra.statements.Statement] A list of INDRA Statements that were extracted by the processor. """ def __init__(self, json_dict): self.tree = objectpath.Tree(json_dict) self.statements = [] def get_events(self): events = self.tree.execute("$.mentions[(@.type is 'EventMention')]") events = list(events) # Skip events that only have one argument #events = [e for e in events if len(e['arguments']) == 2] for event in events: # Skip events with missing arguments if len(event['arguments']) != 2: continue # Process causal events if 'Causal' in event['labels']: subj = event['arguments']['cause'][0] obj = event['arguments']['effect'][0] # Process origin/theme events elif 'Origin' in event['labels']: subj = event['arguments']['origin'][0] obj = event['arguments']['theme'][0] # Skip correlation events for now elif 'Correlation' in event['labels']: logger.warning('Correlation event %s skipped.' % event['id']) continue else: logger.warning('Could not classify event with labels: %s' % ', '.join(event['labels'])) continue subj_agent = self._get_agent(subj) obj_agent = self._get_agent(obj) subj_mods = self._get_mods(subj) obj_mods = self._get_mods(obj) # The interpretation of multiple mods is not clear yet so we # choose the first mod if available subj_delta = subj_mods[0] if subj_mods else \ {'adjectives': [], 'polarity': None} obj_delta = obj_mods[0] if obj_mods else \ {'adjectives': [], 'polarity': None} evidence = self._get_evidence(event) st = Influence(subj_agent, obj_agent, subj_delta, obj_delta, evidence=evidence) self.statements.append(st) @staticmethod def _get_evidence(event): text = event.get('text') annotations = {'found_by' : event['foundBy']} ev = Evidence(source_api='eidos', text=text, annotations=annotations) return [ev] @staticmethod def _get_mods(term): mods = [] attachments = term.get('attachments', []) if len(attachments) > 1: logger.warning('More than one attachment to event.') for attachment in attachments: # Get the polarity if attachment['type'] == 'Increase': polarity = 1 elif attachment['type'] == 'Decrease': polarity = -1 else: polarity = None # Get the adjective mod = attachment.get('mod') mod_dict = json.loads(mod) adjectives = mod_dict.get('quantifier', []) entry = {'adjectives': adjectives, 'polarity': polarity} mods.append(entry) return mods @staticmethod def _get_agent(term): name = term.get('text') agent = Agent(name)
return agent