Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/hl7/parser.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2from __future__ import unicode_literals
3import six
4from .containers import Factory
7def parse(line, encoding='utf-8', factory=Factory):
8 """Returns a instance of the :py:class:`hl7.Message` that allows
9 indexed access to the data elements.
11 A custom :py:class:`hl7.Factory` subclass can be passed in to be used when
12 constructing the message and it's components.
14 .. note::
16 HL7 usually contains only ASCII, but can use other character
17 sets (HL7 Standards Document, Section 1.7.1), however as of v2.8,
18 UTF-8 is the preferred character set [#]_.
20 python-hl7 works on Python unicode strings. :py:func:`hl7.parse`
21 will accept unicode string or will attempt to convert bytestrings
22 into unicode strings using the optional ``encoding`` parameter.
23 ``encoding`` defaults to UTF-8, so no work is needed for bytestrings
24 in UTF-8, but for other character sets like 'cp1252' or 'latin1',
25 ``encoding`` must be set appropriately.
27 >>> h = hl7.parse(message)
29 To decode a non-UTF-8 byte string::
31 hl7.parse(message, encoding='latin1')
33 :rtype: :py:class:`hl7.Message`
35 .. [#] http://wiki.hl7.org/index.php?title=Character_Set_used_in_v2_messages
37 """
38 # Ensure we are working with unicode data, decode the bytestring
39 # if needed
40 if isinstance(line, six.binary_type):
41 line = line.decode(encoding)
42 # Strip out unnecessary whitespace
43 strmsg = line.strip()
44 # The method for parsing the message
45 plan = create_parse_plan(strmsg, factory)
46 # Start spliting the methods based upon the ParsePlan
47 return _split(strmsg, plan)
50def _split(text, plan):
51 """Recursive function to split the *text* into an n-deep list,
52 according to the :py:class:`hl7._ParsePlan`.
53 """
54 # Base condition, if we have used up all the plans
55 if not plan:
56 return text
58 if not plan.applies(text):
59 return plan.container([text])
61 # Parsing of the first segment is awkward because it contains
62 # the separator characters in a field
63 if plan.containers[0] == plan.factory.create_segment and text[:3] in ['MSH', 'FHS']:
64 seg = text[:3]
65 sep0 = text[3]
66 sep_end_off = text.find(sep0, 4)
67 seps = text[4:sep_end_off]
68 text = text[sep_end_off + 1:]
69 data = [plan.factory.create_field('', [seg]), plan.factory.create_field('', [sep0]), plan.factory.create_field(sep0, [seps])]
70 else:
71 data = []
73 if text:
74 data = data + [_split(x, plan.next()) for x in text.split(plan.separator)]
75 # Return the instance of the current message part according
76 # to the plan
77 return plan.container(data)
80def create_parse_plan(strmsg, factory=Factory):
81 """Creates a plan on how to parse the HL7 message according to
82 the details stored within the message.
83 """
84 # We will always use a carriage return to separate segments
85 separators = ['\r']
87 # Extract the rest of the separators. Defaults used if not present.
88 assert strmsg[:3] in ('MSH')
89 sep0 = strmsg[3]
90 seps = list(strmsg[3: strmsg.find(sep0, 4)])
92 separators.append(seps[0])
93 if len(seps) > 2:
94 separators.append(seps[2]) # repetition separator
95 else:
96 separators.append('~') # repetition separator
97 if len(seps) > 1:
98 separators.append(seps[1]) # component separator
99 else:
100 separators.append('^') # component separator
101 if len(seps) > 4:
102 separators.append(seps[4]) # sub-component separator
103 else:
104 separators.append('&') # sub-component separator
105 if len(seps) > 3:
106 esc = seps[3]
107 else:
108 esc = '\\'
110 # The ordered list of containers to create
111 containers = [factory.create_message, factory.create_segment, factory.create_field, factory.create_repetition, factory.create_component]
112 return _ParsePlan(separators, containers, esc, factory)
115class _ParsePlan(object):
116 """Details on how to parse an HL7 message. Typically this object
117 should be created via :func:`hl7.create_parse_plan`
118 """
119 # field, component, repetition, escape, subcomponent
121 def __init__(self, separators, containers, esc, factory):
122 # TODO test to see performance implications of the assertion
123 # since we generate the ParsePlan, this should never be in
124 # invalid state
125 assert len(containers) == len(separators)
126 self.separators = separators
127 self.containers = containers
128 self.esc = esc
129 self.factory = factory
131 @property
132 def separator(self):
133 """Return the current separator to use based on the plan."""
134 return self.separators[0]
136 def container(self, data):
137 """Return an instance of the approriate container for the *data*
138 as specified by the current plan.
139 """
140 return self.containers[0](self.separator, data, self.esc, self.separators, self.factory)
142 def next(self):
143 """Generate the next level of the plan (essentially generates
144 a copy of this plan with the level of the container and the
145 seperator starting at the next index.
146 """
147 if len(self.containers) > 1:
148 # Return a new instance of this class using the tails of
149 # the separators and containers lists. Use self.__class__()
150 # in case :class:`hl7.ParsePlan` is subclassed
151 return self.__class__(self.separators[1:], self.containers[1:], self.esc, self.factory)
152 # When we have no separators and containers left, return None,
153 # which indicates that we have nothing further.
154 return None
156 def applies(self, text):
157 """return True if the separator or those if the children are in the text"""
158 for s in self.separators:
159 if text.find(s) >= 0:
160 return True
161 return False