Coverage for /home/runner/.local/share/hatch/env/virtual/importnb/KA2AwMZG/test.stdlib/lib/python3.9/site-packages/importnb/_json_parser.py: 53%
1897 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-24 22:30 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-24 22:30 +0000
1# The file was automatically generated by Lark v1.1.3
2__version__ = "1.1.3"
4#
5#
6# Lark Stand-alone Generator Tool
7# ----------------------------------
8# Generates a stand-alone LALR(1) parser
9#
10# Git: https://github.com/erezsh/lark
11# Author: Erez Shinan (erezshin@gmail.com)
12#
13#
14# >>> LICENSE
15#
16# This tool and its generated code use a separate license from Lark,
17# and are subject to the terms of the Mozilla Public License, v. 2.0.
18# If a copy of the MPL was not distributed with this
19# file, You can obtain one at https://mozilla.org/MPL/2.0/.
20#
21# If you wish to purchase a commercial license for this tool and its
22# generated code, you may contact me via email or otherwise.
23#
24# If MPL2 is incompatible with your free or open-source project,
25# contact me and we'll work it out.
26#
27#
29from abc import ABC, abstractmethod
30from collections.abc import Sequence
31from types import ModuleType
32from typing import (
33 TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
34 Union, Iterable, IO, TYPE_CHECKING,
35 Pattern as REPattern, ClassVar, Set, Mapping
36)
39class LarkError(Exception):
40 pass
43class ConfigurationError(LarkError, ValueError):
44 pass
47def assert_config(value, options: Collection, msg='Got %r, expected one of %s'):
48 if value not in options:
49 raise ConfigurationError(msg % (value, options))
52class GrammarError(LarkError):
53 pass
56class ParseError(LarkError):
57 pass
60class LexError(LarkError):
61 pass
63T = TypeVar('T')
65class UnexpectedInput(LarkError):
66 #--
67 line: int
68 column: int
69 pos_in_stream = None
70 state: Any
71 _terminals_by_name = None
73 def get_context(self, text: str, span: int=40) -> str:
74 #--
75 assert self.pos_in_stream is not None, self
76 pos = self.pos_in_stream
77 start = max(pos - span, 0)
78 end = pos + span
79 if not isinstance(text, bytes):
80 before = text[start:pos].rsplit('\n', 1)[-1]
81 after = text[pos:end].split('\n', 1)[0]
82 return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n'
83 else:
84 before = text[start:pos].rsplit(b'\n', 1)[-1]
85 after = text[pos:end].split(b'\n', 1)[0]
86 return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")
88 def match_examples(self, parse_fn: 'Callable[[str], Tree]',
89 examples: Union[Mapping[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
90 token_type_match_fallback: bool=False,
91 use_accepts: bool=True
92 ) -> Optional[T]:
93 #--
94 assert self.state is not None, "Not supported for this exception"
96 if isinstance(examples, Mapping):
97 examples = examples.items()
99 candidate = (None, False)
100 for i, (label, example) in enumerate(examples):
101 assert not isinstance(example, str), "Expecting a list"
103 for j, malformed in enumerate(example):
104 try:
105 parse_fn(malformed)
106 except UnexpectedInput as ut:
107 if ut.state == self.state:
108 if (
109 use_accepts
110 and isinstance(self, UnexpectedToken)
111 and isinstance(ut, UnexpectedToken)
112 and ut.accepts != self.accepts
113 ):
114 logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
115 (self.state, self.accepts, ut.accepts, i, j))
116 continue
117 if (
118 isinstance(self, (UnexpectedToken, UnexpectedEOF))
119 and isinstance(ut, (UnexpectedToken, UnexpectedEOF))
120 ):
121 if ut.token == self.token: ##
123 logger.debug("Exact Match at example [%s][%s]" % (i, j))
124 return label
126 if token_type_match_fallback:
127 ##
129 if (ut.token.type == self.token.type) and not candidate[-1]:
130 logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
131 candidate = label, True
133 if candidate[0] is None:
134 logger.debug("Same State match at example [%s][%s]" % (i, j))
135 candidate = label, False
137 return candidate[0]
139 def _format_expected(self, expected):
140 if self._terminals_by_name:
141 d = self._terminals_by_name
142 expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected]
143 return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected)
146class UnexpectedEOF(ParseError, UnexpectedInput):
147 #--
148 expected: 'List[Token]'
150 def __init__(self, expected, state=None, terminals_by_name=None):
151 super(UnexpectedEOF, self).__init__()
153 self.expected = expected
154 self.state = state
155 from .lexer import Token
156 self.token = Token("<EOF>", "") ##
158 self.pos_in_stream = -1
159 self.line = -1
160 self.column = -1
161 self._terminals_by_name = terminals_by_name
164 def __str__(self):
165 message = "Unexpected end-of-input. "
166 message += self._format_expected(self.expected)
167 return message
170class UnexpectedCharacters(LexError, UnexpectedInput):
171 #--
173 allowed: Set[str]
174 considered_tokens: Set[Any]
176 def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
177 terminals_by_name=None, considered_rules=None):
178 super(UnexpectedCharacters, self).__init__()
180 ##
182 self.line = line
183 self.column = column
184 self.pos_in_stream = lex_pos
185 self.state = state
186 self._terminals_by_name = terminals_by_name
188 self.allowed = allowed
189 self.considered_tokens = considered_tokens
190 self.considered_rules = considered_rules
191 self.token_history = token_history
193 if isinstance(seq, bytes):
194 self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace")
195 else:
196 self.char = seq[lex_pos]
197 self._context = self.get_context(seq)
200 def __str__(self):
201 message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column)
202 message += '\n\n' + self._context
203 if self.allowed:
204 message += self._format_expected(self.allowed)
205 if self.token_history:
206 message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history)
207 return message
210class UnexpectedToken(ParseError, UnexpectedInput):
211 #--
213 expected: Set[str]
214 considered_rules: Set[str]
215 interactive_parser: 'InteractiveParser'
217 def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
218 super(UnexpectedToken, self).__init__()
220 ##
222 self.line = getattr(token, 'line', '?')
223 self.column = getattr(token, 'column', '?')
224 self.pos_in_stream = getattr(token, 'start_pos', None)
225 self.state = state
227 self.token = token
228 self.expected = expected ##
230 self._accepts = NO_VALUE
231 self.considered_rules = considered_rules
232 self.interactive_parser = interactive_parser
233 self._terminals_by_name = terminals_by_name
234 self.token_history = token_history
237 @property
238 def accepts(self) -> Set[str]:
239 if self._accepts is NO_VALUE:
240 self._accepts = self.interactive_parser and self.interactive_parser.accepts()
241 return self._accepts
243 def __str__(self):
244 message = ("Unexpected token %r at line %s, column %s.\n%s"
245 % (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected)))
246 if self.token_history:
247 message += "Previous tokens: %r\n" % self.token_history
249 return message
253class VisitError(LarkError):
254 #--
256 obj: 'Union[Tree, Token]'
257 orig_exc: Exception
259 def __init__(self, rule, obj, orig_exc):
260 message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
261 super(VisitError, self).__init__(message)
263 self.rule = rule
264 self.obj = obj
265 self.orig_exc = orig_exc
268class MissingVariableError(LarkError):
269 pass
272import sys, re
273import logging
275logger: logging.Logger = logging.getLogger("lark")
276logger.addHandler(logging.StreamHandler())
277##
279##
281logger.setLevel(logging.CRITICAL)
284NO_VALUE = object()
286T = TypeVar("T")
289def classify(seq: Sequence, key: Optional[Callable] = None, value: Optional[Callable] = None) -> Dict:
290 d: Dict[Any, Any] = {}
291 for item in seq:
292 k = key(item) if (key is not None) else item
293 v = value(item) if (value is not None) else item
294 if k in d:
295 d[k].append(v)
296 else:
297 d[k] = [v]
298 return d
301def _deserialize(data: Any, namespace: Dict[str, Any], memo: Dict) -> Any:
302 if isinstance(data, dict):
303 if '__type__' in data: ##
305 class_ = namespace[data['__type__']]
306 return class_.deserialize(data, memo)
307 elif '@' in data:
308 return memo[data['@']]
309 return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
310 elif isinstance(data, list):
311 return [_deserialize(value, namespace, memo) for value in data]
312 return data
315_T = TypeVar("_T", bound="Serialize")
317class Serialize:
318 #--
320 def memo_serialize(self, types_to_memoize: List) -> Any:
321 memo = SerializeMemoizer(types_to_memoize)
322 return self.serialize(memo), memo.serialize()
324 def serialize(self, memo = None) -> Dict[str, Any]:
325 if memo and memo.in_types(self):
326 return {'@': memo.memoized.get(self)}
328 fields = getattr(self, '__serialize_fields__')
329 res = {f: _serialize(getattr(self, f), memo) for f in fields}
330 res['__type__'] = type(self).__name__
331 if hasattr(self, '_serialize'):
332 self._serialize(res, memo) ##
334 return res
336 @classmethod
337 def deserialize(cls: Type[_T], data: Dict[str, Any], memo: Dict[int, Any]) -> _T:
338 namespace = getattr(cls, '__serialize_namespace__', [])
339 namespace = {c.__name__:c for c in namespace}
341 fields = getattr(cls, '__serialize_fields__')
343 if '@' in data:
344 return memo[data['@']]
346 inst = cls.__new__(cls)
347 for f in fields:
348 try:
349 setattr(inst, f, _deserialize(data[f], namespace, memo))
350 except KeyError as e:
351 raise KeyError("Cannot find key for class", cls, e)
353 if hasattr(inst, '_deserialize'):
354 inst._deserialize() ##
357 return inst
360class SerializeMemoizer(Serialize):
361 #--
363 __serialize_fields__ = 'memoized',
365 def __init__(self, types_to_memoize: List) -> None:
366 self.types_to_memoize = tuple(types_to_memoize)
367 self.memoized = Enumerator()
369 def in_types(self, value: Serialize) -> bool:
370 return isinstance(value, self.types_to_memoize)
372 def serialize(self) -> Dict[int, Any]: ##
374 return _serialize(self.memoized.reversed(), None)
376 @classmethod
377 def deserialize(cls, data: Dict[int, Any], namespace: Dict[str, Any], memo: Dict[Any, Any]) -> Dict[int, Any]: ##
379 return _deserialize(data, namespace, memo)
382try:
383 import regex
384 _has_regex = True
385except ImportError:
386 _has_regex = False
388if sys.version_info >= (3, 11):
389 import re._parser as sre_parse
390 import re._constants as sre_constants
391else:
392 import sre_parse
393 import sre_constants
395categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
397def get_regexp_width(expr: str) -> Union[Tuple[int, int], List[int]]:
398 if _has_regex:
399 ##
401 ##
403 ##
405 regexp_final = re.sub(categ_pattern, 'A', expr)
406 else:
407 if re.search(categ_pattern, expr):
408 raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
409 regexp_final = expr
410 try:
411 ##
413 return [int(x) for x in sre_parse.parse(regexp_final).getwidth()] ##
415 except sre_constants.error:
416 if not _has_regex:
417 raise ValueError(expr)
418 else:
419 ##
421 ##
423 c = regex.compile(regexp_final)
424 if c.match('') is None:
425 ##
427 return 1, int(sre_constants.MAXREPEAT)
428 else:
429 return 0, int(sre_constants.MAXREPEAT)
432from collections import OrderedDict
434class Meta:
436 empty: bool
437 line: int
438 column: int
439 start_pos: int
440 end_line: int
441 end_column: int
442 end_pos: int
443 orig_expansion: 'List[TerminalDef]'
444 match_tree: bool
446 def __init__(self):
447 self.empty = True
450_Leaf_T = TypeVar("_Leaf_T")
451Branch = Union[_Leaf_T, 'Tree[_Leaf_T]']
454class Tree(Generic[_Leaf_T]):
455 #--
457 data: str
458 children: 'List[Branch[_Leaf_T]]'
460 def __init__(self, data: str, children: 'List[Branch[_Leaf_T]]', meta: Optional[Meta]=None) -> None:
461 self.data = data
462 self.children = children
463 self._meta = meta
465 @property
466 def meta(self) -> Meta:
467 if self._meta is None:
468 self._meta = Meta()
469 return self._meta
471 def __repr__(self):
472 return 'Tree(%r, %r)' % (self.data, self.children)
474 def _pretty_label(self):
475 return self.data
477 def _pretty(self, level, indent_str):
478 if len(self.children) == 1 and not isinstance(self.children[0], Tree):
479 return [indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']
481 l = [indent_str*level, self._pretty_label(), '\n']
482 for n in self.children:
483 if isinstance(n, Tree):
484 l += n._pretty(level+1, indent_str)
485 else:
486 l += [indent_str*(level+1), '%s' % (n,), '\n']
488 return l
490 def pretty(self, indent_str: str=' ') -> str:
491 #--
492 return ''.join(self._pretty(0, indent_str))
494 def __rich__(self, parent:'rich.tree.Tree'=None) -> 'rich.tree.Tree':
495 #--
496 return self._rich(parent)
498 def _rich(self, parent):
499 if parent:
500 tree = parent.add(f'[bold]{self.data}[/bold]')
501 else:
502 import rich.tree
503 tree = rich.tree.Tree(self.data)
505 for c in self.children:
506 if isinstance(c, Tree):
507 c._rich(tree)
508 else:
509 tree.add(f'[green]{c}[/green]')
511 return tree
513 def __eq__(self, other):
514 try:
515 return self.data == other.data and self.children == other.children
516 except AttributeError:
517 return False
519 def __ne__(self, other):
520 return not (self == other)
522 def __hash__(self) -> int:
523 return hash((self.data, tuple(self.children)))
525 def iter_subtrees(self) -> 'Iterator[Tree[_Leaf_T]]':
526 #--
527 queue = [self]
528 subtrees = OrderedDict()
529 for subtree in queue:
530 subtrees[id(subtree)] = subtree
531 ##
533 queue += [c for c in reversed(subtree.children) ##
535 if isinstance(c, Tree) and id(c) not in subtrees]
537 del queue
538 return reversed(list(subtrees.values()))
540 def iter_subtrees_topdown(self):
541 #--
542 stack = [self]
543 while stack:
544 node = stack.pop()
545 if not isinstance(node, Tree):
546 continue
547 yield node
548 for child in reversed(node.children):
549 stack.append(child)
551 def find_pred(self, pred: 'Callable[[Tree[_Leaf_T]], bool]') -> 'Iterator[Tree[_Leaf_T]]':
552 #--
553 return filter(pred, self.iter_subtrees())
555 def find_data(self, data: str) -> 'Iterator[Tree[_Leaf_T]]':
556 #--
557 return self.find_pred(lambda t: t.data == data)
560from functools import wraps, update_wrapper
561from inspect import getmembers, getmro
563_Return_T = TypeVar('_Return_T')
564_Return_V = TypeVar('_Return_V')
565_Leaf_T = TypeVar('_Leaf_T')
566_Leaf_U = TypeVar('_Leaf_U')
567_R = TypeVar('_R')
568_FUNC = Callable[..., _Return_T]
569_DECORATED = Union[_FUNC, type]
571class _DiscardType:
572 #--
574 def __repr__(self):
575 return "lark.visitors.Discard"
577Discard = _DiscardType()
579##
582class _Decoratable:
583 #--
585 @classmethod
586 def _apply_v_args(cls, visit_wrapper):
587 mro = getmro(cls)
588 assert mro[0] is cls
589 libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
590 for name, value in getmembers(cls):
592 ##
594 if name.startswith('_') or (name in libmembers and name not in cls.__dict__):
595 continue
596 if not callable(value):
597 continue
599 ##
601 if isinstance(cls.__dict__[name], _VArgsWrapper):
602 continue
604 setattr(cls, name, _VArgsWrapper(cls.__dict__[name], visit_wrapper))
605 return cls
607 def __class_getitem__(cls, _):
608 return cls
611class Transformer(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
612 #--
613 __visit_tokens__ = True ##
616 def __init__(self, visit_tokens: bool=True) -> None:
617 self.__visit_tokens__ = visit_tokens
619 def _call_userfunc(self, tree, new_children=None):
620 ##
622 children = new_children if new_children is not None else tree.children
623 try:
624 f = getattr(self, tree.data)
625 except AttributeError:
626 return self.__default__(tree.data, children, tree.meta)
627 else:
628 try:
629 wrapper = getattr(f, 'visit_wrapper', None)
630 if wrapper is not None:
631 return f.visit_wrapper(f, tree.data, children, tree.meta)
632 else:
633 return f(children)
634 except GrammarError:
635 raise
636 except Exception as e:
637 raise VisitError(tree.data, tree, e)
639 def _call_userfunc_token(self, token):
640 try:
641 f = getattr(self, token.type)
642 except AttributeError:
643 return self.__default_token__(token)
644 else:
645 try:
646 return f(token)
647 except GrammarError:
648 raise
649 except Exception as e:
650 raise VisitError(token.type, token, e)
652 def _transform_children(self, children):
653 for c in children:
654 if isinstance(c, Tree):
655 res = self._transform_tree(c)
656 elif self.__visit_tokens__ and isinstance(c, Token):
657 res = self._call_userfunc_token(c)
658 else:
659 res = c
661 if res is not Discard:
662 yield res
664 def _transform_tree(self, tree):
665 children = list(self._transform_children(tree.children))
666 return self._call_userfunc(tree, children)
668 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
669 #--
670 return self._transform_tree(tree)
672 def __mul__(
673 self: 'Transformer[_Leaf_T, Tree[_Leaf_U]]',
674 other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V,]]'
675 ) -> 'TransformerChain[_Leaf_T, _Return_V]':
676 #--
677 return TransformerChain(self, other)
679 def __default__(self, data, children, meta):
680 #--
681 return Tree(data, children, meta)
683 def __default_token__(self, token):
684 #--
685 return token
688def merge_transformers(base_transformer=None, **transformers_to_merge):
689 #--
690 if base_transformer is None:
691 base_transformer = Transformer()
692 for prefix, transformer in transformers_to_merge.items():
693 for method_name in dir(transformer):
694 method = getattr(transformer, method_name)
695 if not callable(method):
696 continue
697 if method_name.startswith("_") or method_name == "transform":
698 continue
699 prefixed_method = prefix + "__" + method_name
700 if hasattr(base_transformer, prefixed_method):
701 raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method)
703 setattr(base_transformer, prefixed_method, method)
705 return base_transformer
708class InlineTransformer(Transformer): ##
710 def _call_userfunc(self, tree, new_children=None):
711 ##
713 children = new_children if new_children is not None else tree.children
714 try:
715 f = getattr(self, tree.data)
716 except AttributeError:
717 return self.__default__(tree.data, children, tree.meta)
718 else:
719 return f(*children)
722class TransformerChain(Generic[_Leaf_T, _Return_T]):
724 transformers: 'Tuple[Union[Transformer, TransformerChain], ...]'
726 def __init__(self, *transformers: 'Union[Transformer, TransformerChain]') -> None:
727 self.transformers = transformers
729 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
730 for t in self.transformers:
731 tree = t.transform(tree)
732 return cast(_Return_T, tree)
734 def __mul__(
735 self: 'TransformerChain[_Leaf_T, Tree[_Leaf_U]]',
736 other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V]]'
737 ) -> 'TransformerChain[_Leaf_T, _Return_V]':
738 return TransformerChain(*self.transformers + (other,))
741class Transformer_InPlace(Transformer):
742 #--
743 def _transform_tree(self, tree): ##
745 return self._call_userfunc(tree)
747 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
748 for subtree in tree.iter_subtrees():
749 subtree.children = list(self._transform_children(subtree.children))
751 return self._transform_tree(tree)
754class Transformer_NonRecursive(Transformer):
755 #--
757 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
758 ##
760 rev_postfix = []
761 q: List[Branch[_Leaf_T]] = [tree]
762 while q:
763 t = q.pop()
764 rev_postfix.append(t)
765 if isinstance(t, Tree):
766 q += t.children
768 ##
770 stack: List = []
771 for x in reversed(rev_postfix):
772 if isinstance(x, Tree):
773 size = len(x.children)
774 if size:
775 args = stack[-size:]
776 del stack[-size:]
777 else:
778 args = []
780 res = self._call_userfunc(x, args)
781 if res is not Discard:
782 stack.append(res)
784 elif self.__visit_tokens__ and isinstance(x, Token):
785 res = self._call_userfunc_token(x)
786 if res is not Discard:
787 stack.append(res)
788 else:
789 stack.append(x)
791 result, = stack ##
793 ##
795 ##
797 ##
799 return cast(_Return_T, result)
802class Transformer_InPlaceRecursive(Transformer):
803 #--
804 def _transform_tree(self, tree):
805 tree.children = list(self._transform_children(tree.children))
806 return self._call_userfunc(tree)
809##
812class VisitorBase:
813 def _call_userfunc(self, tree):
814 return getattr(self, tree.data, self.__default__)(tree)
816 def __default__(self, tree):
817 #--
818 return tree
820 def __class_getitem__(cls, _):
821 return cls
824class Visitor(VisitorBase, ABC, Generic[_Leaf_T]):
825 #--
827 def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
828 #--
829 for subtree in tree.iter_subtrees():
830 self._call_userfunc(subtree)
831 return tree
833 def visit_topdown(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
834 #--
835 for subtree in tree.iter_subtrees_topdown():
836 self._call_userfunc(subtree)
837 return tree
840class Visitor_Recursive(VisitorBase, Generic[_Leaf_T]):
841 #--
843 def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
844 #--
845 for child in tree.children:
846 if isinstance(child, Tree):
847 self.visit(child)
849 self._call_userfunc(tree)
850 return tree
852 def visit_topdown(self,tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
853 #--
854 self._call_userfunc(tree)
856 for child in tree.children:
857 if isinstance(child, Tree):
858 self.visit_topdown(child)
860 return tree
863class Interpreter(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
864 #--
866 def visit(self, tree: Tree[_Leaf_T]) -> _Return_T:
867 ##
869 ##
871 ##
873 return self._visit_tree(tree)
875 def _visit_tree(self, tree: Tree[_Leaf_T]):
876 f = getattr(self, tree.data)
877 wrapper = getattr(f, 'visit_wrapper', None)
878 if wrapper is not None:
879 return f.visit_wrapper(f, tree.data, tree.children, tree.meta)
880 else:
881 return f(tree)
883 def visit_children(self, tree: Tree[_Leaf_T]) -> List:
884 return [self._visit_tree(child) if isinstance(child, Tree) else child
885 for child in tree.children]
887 def __getattr__(self, name):
888 return self.__default__
890 def __default__(self, tree):
891 return self.visit_children(tree)
894_InterMethod = Callable[[Type[Interpreter], _Return_T], _R]
896def visit_children_decor(func: _InterMethod) -> _InterMethod:
897 #--
898 @wraps(func)
899 def inner(cls, tree):
900 values = cls.visit_children(tree)
901 return func(cls, values)
902 return inner
904##
907def _apply_v_args(obj, visit_wrapper):
908 try:
909 _apply = obj._apply_v_args
910 except AttributeError:
911 return _VArgsWrapper(obj, visit_wrapper)
912 else:
913 return _apply(visit_wrapper)
916class _VArgsWrapper:
917 #--
918 base_func: Callable
920 def __init__(self, func: Callable, visit_wrapper: Callable[[Callable, str, list, Any], Any]):
921 if isinstance(func, _VArgsWrapper):
922 func = func.base_func
923 ##
925 self.base_func = func ##
927 self.visit_wrapper = visit_wrapper
928 update_wrapper(self, func)
930 def __call__(self, *args, **kwargs):
931 return self.base_func(*args, **kwargs)
933 def __get__(self, instance, owner=None):
934 try:
935 ##
937 ##
939 g = type(self.base_func).__get__
940 except AttributeError:
941 return self
942 else:
943 return _VArgsWrapper(g(self.base_func, instance, owner), self.visit_wrapper)
945 def __set_name__(self, owner, name):
946 try:
947 f = type(self.base_func).__set_name__
948 except AttributeError:
949 return
950 else:
951 f(self.base_func, owner, name)
954def _vargs_inline(f, _data, children, _meta):
955 return f(*children)
956def _vargs_meta_inline(f, _data, children, meta):
957 return f(meta, *children)
958def _vargs_meta(f, _data, children, meta):
959 return f(meta, children)
960def _vargs_tree(f, data, children, meta):
961 return f(Tree(data, children, meta))
964def v_args(inline: bool = False, meta: bool = False, tree: bool = False, wrapper: Optional[Callable] = None) -> Callable[[_DECORATED], _DECORATED]:
965 #--
966 if tree and (meta or inline):
967 raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.")
969 func = None
970 if meta:
971 if inline:
972 func = _vargs_meta_inline
973 else:
974 func = _vargs_meta
975 elif inline:
976 func = _vargs_inline
977 elif tree:
978 func = _vargs_tree
980 if wrapper is not None:
981 if func is not None:
982 raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.")
983 func = wrapper
985 def _visitor_args_dec(obj):
986 return _apply_v_args(obj, func)
987 return _visitor_args_dec
991TOKEN_DEFAULT_PRIORITY = 0
994class Symbol(Serialize):
995 __slots__ = ('name',)
997 name: str
998 is_term: ClassVar[bool] = NotImplemented
1000 def __init__(self, name: str) -> None:
1001 self.name = name
1003 def __eq__(self, other):
1004 assert isinstance(other, Symbol), other
1005 return self.is_term == other.is_term and self.name == other.name
1007 def __ne__(self, other):
1008 return not (self == other)
1010 def __hash__(self):
1011 return hash(self.name)
1013 def __repr__(self):
1014 return '%s(%r)' % (type(self).__name__, self.name)
1016 fullrepr = property(__repr__)
1018 def renamed(self, f):
1019 return type(self)(f(self.name))
1022class Terminal(Symbol):
1023 __serialize_fields__ = 'name', 'filter_out'
1025 is_term: ClassVar[bool] = True
1027 def __init__(self, name, filter_out=False):
1028 self.name = name
1029 self.filter_out = filter_out
1031 @property
1032 def fullrepr(self):
1033 return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)
1035 def renamed(self, f):
1036 return type(self)(f(self.name), self.filter_out)
1039class NonTerminal(Symbol):
1040 __serialize_fields__ = 'name',
1042 is_term: ClassVar[bool] = False
1045class RuleOptions(Serialize):
1046 __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices'
1048 keep_all_tokens: bool
1049 expand1: bool
1050 priority: Optional[int]
1051 template_source: Optional[str]
1052 empty_indices: Tuple[bool, ...]
1054 def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None:
1055 self.keep_all_tokens = keep_all_tokens
1056 self.expand1 = expand1
1057 self.priority = priority
1058 self.template_source = template_source
1059 self.empty_indices = empty_indices
1061 def __repr__(self):
1062 return 'RuleOptions(%r, %r, %r, %r)' % (
1063 self.keep_all_tokens,
1064 self.expand1,
1065 self.priority,
1066 self.template_source
1067 )
1070class Rule(Serialize):
1071 #--
1072 __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
1074 __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
1075 __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
1077 def __init__(self, origin, expansion, order=0, alias=None, options=None):
1078 self.origin = origin
1079 self.expansion = expansion
1080 self.alias = alias
1081 self.order = order
1082 self.options = options or RuleOptions()
1083 self._hash = hash((self.origin, tuple(self.expansion)))
1085 def _deserialize(self):
1086 self._hash = hash((self.origin, tuple(self.expansion)))
1088 def __str__(self):
1089 return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion))
1091 def __repr__(self):
1092 return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)
1094 def __hash__(self):
1095 return self._hash
1097 def __eq__(self, other):
1098 if not isinstance(other, Rule):
1099 return False
1100 return self.origin == other.origin and self.expansion == other.expansion
1104from copy import copy
1107class Pattern(Serialize, ABC):
1109 value: str
1110 flags: Collection[str]
1111 raw: Optional[str]
1112 type: ClassVar[str]
1114 def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None:
1115 self.value = value
1116 self.flags = frozenset(flags)
1117 self.raw = raw
1119 def __repr__(self):
1120 return repr(self.to_regexp())
1122 ##
1124 def __hash__(self):
1125 return hash((type(self), self.value, self.flags))
1127 def __eq__(self, other):
1128 return type(self) == type(other) and self.value == other.value and self.flags == other.flags
1130 @abstractmethod
1131 def to_regexp(self) -> str:
1132 raise NotImplementedError()
1134 @property
1135 @abstractmethod
1136 def min_width(self) -> int:
1137 raise NotImplementedError()
1139 @property
1140 @abstractmethod
1141 def max_width(self) -> int:
1142 raise NotImplementedError()
1144 def _get_flags(self, value):
1145 for f in self.flags:
1146 value = ('(?%s:%s)' % (f, value))
1147 return value
1150class PatternStr(Pattern):
1151 __serialize_fields__ = 'value', 'flags'
1153 type: ClassVar[str] = "str"
1155 def to_regexp(self) -> str:
1156 return self._get_flags(re.escape(self.value))
1158 @property
1159 def min_width(self) -> int:
1160 return len(self.value)
1162 @property
1163 def max_width(self) -> int:
1164 return len(self.value)
1167class PatternRE(Pattern):
1168 __serialize_fields__ = 'value', 'flags', '_width'
1170 type: ClassVar[str] = "re"
1172 def to_regexp(self) -> str:
1173 return self._get_flags(self.value)
1175 _width = None
1176 def _get_width(self):
1177 if self._width is None:
1178 self._width = get_regexp_width(self.to_regexp())
1179 return self._width
1181 @property
1182 def min_width(self) -> int:
1183 return self._get_width()[0]
1185 @property
1186 def max_width(self) -> int:
1187 return self._get_width()[1]
1190class TerminalDef(Serialize):
1191 __serialize_fields__ = 'name', 'pattern', 'priority'
1192 __serialize_namespace__ = PatternStr, PatternRE
1194 name: str
1195 pattern: Pattern
1196 priority: int
1198 def __init__(self, name: str, pattern: Pattern, priority: int=TOKEN_DEFAULT_PRIORITY) -> None:
1199 assert isinstance(pattern, Pattern), pattern
1200 self.name = name
1201 self.pattern = pattern
1202 self.priority = priority
1204 def __repr__(self):
1205 return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
1207 def user_repr(self) -> str:
1208 if self.name.startswith('__'): ##
1210 return self.pattern.raw or self.name
1211 else:
1212 return self.name
1214_T = TypeVar('_T', bound="Token")
1216class Token(str):
1217 #--
1218 __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
1220 type: str
1221 start_pos: int
1222 value: Any
1223 line: int
1224 column: int
1225 end_line: int
1226 end_column: int
1227 end_pos: int
1229 def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
1230 inst = super(Token, cls).__new__(cls, value)
1231 inst.type = type_
1232 inst.start_pos = start_pos
1233 inst.value = value
1234 inst.line = line
1235 inst.column = column
1236 inst.end_line = end_line
1237 inst.end_column = end_column
1238 inst.end_pos = end_pos
1239 return inst
1241 def update(self, type_: Optional[str]=None, value: Optional[Any]=None) -> 'Token':
1242 return Token.new_borrow_pos(
1243 type_ if type_ is not None else self.type,
1244 value if value is not None else self.value,
1245 self
1246 )
1248 @classmethod
1249 def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T:
1250 return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
1252 def __reduce__(self):
1253 return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column))
1255 def __repr__(self):
1256 return 'Token(%r, %r)' % (self.type, self.value)
1258 def __deepcopy__(self, memo):
1259 return Token(self.type, self.value, self.start_pos, self.line, self.column)
1261 def __eq__(self, other):
1262 if isinstance(other, Token) and self.type != other.type:
1263 return False
1265 return str.__eq__(self, other)
1267 __hash__ = str.__hash__
1270class LineCounter:
1271 __slots__ = 'char_pos', 'line', 'column', 'line_start_pos', 'newline_char'
1273 def __init__(self, newline_char):
1274 self.newline_char = newline_char
1275 self.char_pos = 0
1276 self.line = 1
1277 self.column = 1
1278 self.line_start_pos = 0
1280 def __eq__(self, other):
1281 if not isinstance(other, LineCounter):
1282 return NotImplemented
1284 return self.char_pos == other.char_pos and self.newline_char == other.newline_char
1286 def feed(self, token: Token, test_newline=True):
1287 #--
1288 if test_newline:
1289 newlines = token.count(self.newline_char)
1290 if newlines:
1291 self.line += newlines
1292 self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1
1294 self.char_pos += len(token)
1295 self.column = self.char_pos - self.line_start_pos + 1
1298class UnlessCallback:
1299 def __init__(self, scanner):
1300 self.scanner = scanner
1302 def __call__(self, t):
1303 res = self.scanner.match(t.value, 0)
1304 if res:
1305 _value, t.type = res
1306 return t
1309class CallChain:
1310 def __init__(self, callback1, callback2, cond):
1311 self.callback1 = callback1
1312 self.callback2 = callback2
1313 self.cond = cond
1315 def __call__(self, t):
1316 t2 = self.callback1(t)
1317 return self.callback2(t) if self.cond(t2) else t2
1320def _get_match(re_, regexp, s, flags):
1321 m = re_.match(regexp, s, flags)
1322 if m:
1323 return m.group(0)
1325def _create_unless(terminals, g_regex_flags, re_, use_bytes):
1326 tokens_by_type = classify(terminals, lambda t: type(t.pattern))
1327 assert len(tokens_by_type) <= 2, tokens_by_type.keys()
1328 embedded_strs = set()
1329 callback = {}
1330 for retok in tokens_by_type.get(PatternRE, []):
1331 unless = []
1332 for strtok in tokens_by_type.get(PatternStr, []):
1333 if strtok.priority != retok.priority:
1334 continue
1335 s = strtok.pattern.value
1336 if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags):
1337 unless.append(strtok)
1338 if strtok.pattern.flags <= retok.pattern.flags:
1339 embedded_strs.add(strtok)
1340 if unless:
1341 callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes))
1343 new_terminals = [t for t in terminals if t not in embedded_strs]
1344 return new_terminals, callback
1347class Scanner:
1348 def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False):
1349 self.terminals = terminals
1350 self.g_regex_flags = g_regex_flags
1351 self.re_ = re_
1352 self.use_bytes = use_bytes
1353 self.match_whole = match_whole
1355 self.allowed_types = {t.name for t in self.terminals}
1357 self._mres = self._build_mres(terminals, len(terminals))
1359 def _build_mres(self, terminals, max_size):
1360 ##
1362 ##
1364 ##
1366 postfix = '$' if self.match_whole else ''
1367 mres = []
1368 while terminals:
1369 pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp() + postfix) for t in terminals[:max_size])
1370 if self.use_bytes:
1371 pattern = pattern.encode('latin-1')
1372 try:
1373 mre = self.re_.compile(pattern, self.g_regex_flags)
1374 except AssertionError: ##
1376 return self._build_mres(terminals, max_size//2)
1378 mres.append(mre)
1379 terminals = terminals[max_size:]
1380 return mres
1382 def match(self, text, pos):
1383 for mre in self._mres:
1384 m = mre.match(text, pos)
1385 if m:
1386 return m.group(0), m.lastgroup
1389def _regexp_has_newline(r: str):
1390 #--
1391 return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)
1394class LexerState:
1395 #--
1397 __slots__ = 'text', 'line_ctr', 'last_token'
1399 def __init__(self, text, line_ctr=None, last_token=None):
1400 self.text = text
1401 self.line_ctr = line_ctr or LineCounter(b'\n' if isinstance(text, bytes) else '\n')
1402 self.last_token = last_token
1404 def __eq__(self, other):
1405 if not isinstance(other, LexerState):
1406 return NotImplemented
1408 return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token
1410 def __copy__(self):
1411 return type(self)(self.text, copy(self.line_ctr), self.last_token)
1414class LexerThread:
1415 #--
1417 def __init__(self, lexer: 'Lexer', lexer_state: LexerState):
1418 self.lexer = lexer
1419 self.state = lexer_state
1421 @classmethod
1422 def from_text(cls, lexer: 'Lexer', text: str):
1423 return cls(lexer, LexerState(text))
1425 def lex(self, parser_state):
1426 return self.lexer.lex(self.state, parser_state)
1428 def __copy__(self):
1429 return type(self)(self.lexer, copy(self.state))
1431 _Token = Token
1434_Callback = Callable[[Token], Token]
1436class Lexer(ABC):
1437 #--
1438 @abstractmethod
1439 def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
1440 return NotImplemented
1442 def make_lexer_state(self, text):
1443 #--
1444 return LexerState(text)
1447class BasicLexer(Lexer):
1449 terminals: Collection[TerminalDef]
1450 ignore_types: FrozenSet[str]
1451 newline_types: FrozenSet[str]
1452 user_callbacks: Dict[str, _Callback]
1453 callback: Dict[str, _Callback]
1454 re: ModuleType
1456 def __init__(self, conf: 'LexerConf') -> None:
1457 terminals = list(conf.terminals)
1458 assert all(isinstance(t, TerminalDef) for t in terminals), terminals
1460 self.re = conf.re_module
1462 if not conf.skip_validation:
1463 ##
1465 for t in terminals:
1466 try:
1467 self.re.compile(t.pattern.to_regexp(), conf.g_regex_flags)
1468 except self.re.error:
1469 raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
1471 if t.pattern.min_width == 0:
1472 raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))
1474 if not (set(conf.ignore) <= {t.name for t in terminals}):
1475 raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals}))
1477 ##
1479 self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp()))
1480 self.ignore_types = frozenset(conf.ignore)
1482 terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
1483 self.terminals = terminals
1484 self.user_callbacks = conf.callbacks
1485 self.g_regex_flags = conf.g_regex_flags
1486 self.use_bytes = conf.use_bytes
1487 self.terminals_by_name = conf.terminals_by_name
1489 self._scanner = None
1491 def _build_scanner(self):
1492 terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes)
1493 assert all(self.callback.values())
1495 for type_, f in self.user_callbacks.items():
1496 if type_ in self.callback:
1497 ##
1499 self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_)
1500 else:
1501 self.callback[type_] = f
1503 self._scanner = Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes)
1505 @property
1506 def scanner(self):
1507 if self._scanner is None:
1508 self._build_scanner()
1509 return self._scanner
1511 def match(self, text, pos):
1512 return self.scanner.match(text, pos)
1514 def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]:
1515 with suppress(EOFError):
1516 while True:
1517 yield self.next_token(state, parser_state)
1519 def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token:
1520 line_ctr = lex_state.line_ctr
1521 while line_ctr.char_pos < len(lex_state.text):
1522 res = self.match(lex_state.text, line_ctr.char_pos)
1523 if not res:
1524 allowed = self.scanner.allowed_types - self.ignore_types
1525 if not allowed:
1526 allowed = {"<END-OF-FILE>"}
1527 raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,
1528 allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token],
1529 state=parser_state, terminals_by_name=self.terminals_by_name)
1531 value, type_ = res
1533 if type_ not in self.ignore_types:
1534 t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
1535 line_ctr.feed(value, type_ in self.newline_types)
1536 t.end_line = line_ctr.line
1537 t.end_column = line_ctr.column
1538 t.end_pos = line_ctr.char_pos
1539 if t.type in self.callback:
1540 t = self.callback[t.type](t)
1541 if not isinstance(t, Token):
1542 raise LexError("Callbacks must return a token (returned %r)" % t)
1543 lex_state.last_token = t
1544 return t
1545 else:
1546 if type_ in self.callback:
1547 t2 = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
1548 self.callback[type_](t2)
1549 line_ctr.feed(value, type_ in self.newline_types)
1551 ##
1553 raise EOFError(self)
1556class ContextualLexer(Lexer):
1558 lexers: Dict[str, BasicLexer]
1559 root_lexer: BasicLexer
1561 def __init__(self, conf: 'LexerConf', states: Dict[str, Collection[str]], always_accept: Collection[str]=()) -> None:
1562 terminals = list(conf.terminals)
1563 terminals_by_name = conf.terminals_by_name
1565 trad_conf = copy(conf)
1566 trad_conf.terminals = terminals
1568 lexer_by_tokens: Dict[FrozenSet[str], BasicLexer] = {}
1569 self.lexers = {}
1570 for state, accepts in states.items():
1571 key = frozenset(accepts)
1572 try:
1573 lexer = lexer_by_tokens[key]
1574 except KeyError:
1575 accepts = set(accepts) | set(conf.ignore) | set(always_accept)
1576 lexer_conf = copy(trad_conf)
1577 lexer_conf.terminals = [terminals_by_name[n] for n in accepts if n in terminals_by_name]
1578 lexer = BasicLexer(lexer_conf)
1579 lexer_by_tokens[key] = lexer
1581 self.lexers[state] = lexer
1583 assert trad_conf.terminals is terminals
1584 self.root_lexer = BasicLexer(trad_conf)
1586 def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
1587 try:
1588 while True:
1589 lexer = self.lexers[parser_state.position]
1590 yield lexer.next_token(lexer_state, parser_state)
1591 except EOFError:
1592 pass
1593 except UnexpectedCharacters as e:
1594 ##
1596 ##
1598 try:
1599 last_token = lexer_state.last_token ##
1601 token = self.root_lexer.next_token(lexer_state, parser_state)
1602 raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_name)
1603 except UnexpectedCharacters:
1604 raise e ##
1609_ParserArgType: 'TypeAlias' = 'Literal["earley", "lalr", "cyk", "auto"]'
1610_LexerArgType: 'TypeAlias' = 'Union[Literal["auto", "basic", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]'
1611_Callback = Callable[[Token], Token]
1613class LexerConf(Serialize):
1614 __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type'
1615 __serialize_namespace__ = TerminalDef,
1617 terminals: Collection[TerminalDef]
1618 re_module: ModuleType
1619 ignore: Collection[str]
1620 postlex: 'Optional[PostLex]'
1621 callbacks: Dict[str, _Callback]
1622 g_regex_flags: int
1623 skip_validation: bool
1624 use_bytes: bool
1625 lexer_type: Optional[_LexerArgType]
1627 def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False):
1628 self.terminals = terminals
1629 self.terminals_by_name = {t.name: t for t in self.terminals}
1630 assert len(self.terminals) == len(self.terminals_by_name)
1631 self.ignore = ignore
1632 self.postlex = postlex
1633 self.callbacks = callbacks or {}
1634 self.g_regex_flags = g_regex_flags
1635 self.re_module = re_module
1636 self.skip_validation = skip_validation
1637 self.use_bytes = use_bytes
1638 self.lexer_type = None
1640 def _deserialize(self):
1641 self.terminals_by_name = {t.name: t for t in self.terminals}
1643 def __deepcopy__(self, memo=None):
1644 return type(self)(
1645 deepcopy(self.terminals, memo),
1646 self.re_module,
1647 deepcopy(self.ignore, memo),
1648 deepcopy(self.postlex, memo),
1649 deepcopy(self.callbacks, memo),
1650 deepcopy(self.g_regex_flags, memo),
1651 deepcopy(self.skip_validation, memo),
1652 deepcopy(self.use_bytes, memo),
1653 )
1656class ParserConf(Serialize):
1657 __serialize_fields__ = 'rules', 'start', 'parser_type'
1659 def __init__(self, rules, callbacks, start):
1660 assert isinstance(start, list)
1661 self.rules = rules
1662 self.callbacks = callbacks
1663 self.start = start
1665 self.parser_type = None
1668from functools import partial, wraps
1669from itertools import repeat, product
1672class ExpandSingleChild:
1673 def __init__(self, node_builder):
1674 self.node_builder = node_builder
1676 def __call__(self, children):
1677 if len(children) == 1:
1678 return children[0]
1679 else:
1680 return self.node_builder(children)
1684class PropagatePositions:
1685 def __init__(self, node_builder, node_filter=None):
1686 self.node_builder = node_builder
1687 self.node_filter = node_filter
1689 def __call__(self, children):
1690 res = self.node_builder(children)
1692 if isinstance(res, Tree):
1693 ##
1695 ##
1697 ##
1699 ##
1702 res_meta = res.meta
1704 first_meta = self._pp_get_meta(children)
1705 if first_meta is not None:
1706 if not hasattr(res_meta, 'line'):
1707 ##
1709 res_meta.line = getattr(first_meta, 'container_line', first_meta.line)
1710 res_meta.column = getattr(first_meta, 'container_column', first_meta.column)
1711 res_meta.start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos)
1712 res_meta.empty = False
1714 res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line)
1715 res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column)
1717 last_meta = self._pp_get_meta(reversed(children))
1718 if last_meta is not None:
1719 if not hasattr(res_meta, 'end_line'):
1720 res_meta.end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
1721 res_meta.end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
1722 res_meta.end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos)
1723 res_meta.empty = False
1725 res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
1726 res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
1728 return res
1730 def _pp_get_meta(self, children):
1731 for c in children:
1732 if self.node_filter is not None and not self.node_filter(c):
1733 continue
1734 if isinstance(c, Tree):
1735 if not c.meta.empty:
1736 return c.meta
1737 elif isinstance(c, Token):
1738 return c
1740def make_propagate_positions(option):
1741 if callable(option):
1742 return partial(PropagatePositions, node_filter=option)
1743 elif option is True:
1744 return PropagatePositions
1745 elif option is False:
1746 return None
1748 raise ConfigurationError('Invalid option for propagate_positions: %r' % option)
1751class ChildFilter:
1752 def __init__(self, to_include, append_none, node_builder):
1753 self.node_builder = node_builder
1754 self.to_include = to_include
1755 self.append_none = append_none
1757 def __call__(self, children):
1758 filtered = []
1760 for i, to_expand, add_none in self.to_include:
1761 if add_none:
1762 filtered += [None] * add_none
1763 if to_expand:
1764 filtered += children[i].children
1765 else:
1766 filtered.append(children[i])
1768 if self.append_none:
1769 filtered += [None] * self.append_none
1771 return self.node_builder(filtered)
1774class ChildFilterLALR(ChildFilter):
1775 #--
1777 def __call__(self, children):
1778 filtered = []
1779 for i, to_expand, add_none in self.to_include:
1780 if add_none:
1781 filtered += [None] * add_none
1782 if to_expand:
1783 if filtered:
1784 filtered += children[i].children
1785 else: ##
1787 filtered = children[i].children
1788 else:
1789 filtered.append(children[i])
1791 if self.append_none:
1792 filtered += [None] * self.append_none
1794 return self.node_builder(filtered)
1797class ChildFilterLALR_NoPlaceholders(ChildFilter):
1798 #--
1799 def __init__(self, to_include, node_builder):
1800 self.node_builder = node_builder
1801 self.to_include = to_include
1803 def __call__(self, children):
1804 filtered = []
1805 for i, to_expand in self.to_include:
1806 if to_expand:
1807 if filtered:
1808 filtered += children[i].children
1809 else: ##
1811 filtered = children[i].children
1812 else:
1813 filtered.append(children[i])
1814 return self.node_builder(filtered)
1817def _should_expand(sym):
1818 return not sym.is_term and sym.name.startswith('_')
1821def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]):
1822 ##
1824 if _empty_indices:
1825 assert _empty_indices.count(False) == len(expansion)
1826 s = ''.join(str(int(b)) for b in _empty_indices)
1827 empty_indices = [len(ones) for ones in s.split('0')]
1828 assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
1829 else:
1830 empty_indices = [0] * (len(expansion)+1)
1832 to_include = []
1833 nones_to_add = 0
1834 for i, sym in enumerate(expansion):
1835 nones_to_add += empty_indices[i]
1836 if keep_all_tokens or not (sym.is_term and sym.filter_out):
1837 to_include.append((i, _should_expand(sym), nones_to_add))
1838 nones_to_add = 0
1840 nones_to_add += empty_indices[len(expansion)]
1842 if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
1843 if _empty_indices or ambiguous:
1844 return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
1845 else:
1846 ##
1848 return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])
1851class AmbiguousExpander:
1852 #--
1853 def __init__(self, to_expand, tree_class, node_builder):
1854 self.node_builder = node_builder
1855 self.tree_class = tree_class
1856 self.to_expand = to_expand
1858 def __call__(self, children):
1859 def _is_ambig_tree(t):
1860 return hasattr(t, 'data') and t.data == '_ambig'
1862 ##
1864 ##
1866 ##
1868 ##
1870 ambiguous = []
1871 for i, child in enumerate(children):
1872 if _is_ambig_tree(child):
1873 if i in self.to_expand:
1874 ambiguous.append(i)
1876 child.expand_kids_by_data('_ambig')
1878 if not ambiguous:
1879 return self.node_builder(children)
1881 expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)]
1882 return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))])
1885def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
1886 to_expand = [i for i, sym in enumerate(expansion)
1887 if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
1888 if to_expand:
1889 return partial(AmbiguousExpander, to_expand, tree_class)
1892class AmbiguousIntermediateExpander:
1893 #--
1895 def __init__(self, tree_class, node_builder):
1896 self.node_builder = node_builder
1897 self.tree_class = tree_class
1899 def __call__(self, children):
1900 def _is_iambig_tree(child):
1901 return hasattr(child, 'data') and child.data == '_iambig'
1903 def _collapse_iambig(children):
1904 #--
1906 ##
1908 ##
1910 if children and _is_iambig_tree(children[0]):
1911 iambig_node = children[0]
1912 result = []
1913 for grandchild in iambig_node.children:
1914 collapsed = _collapse_iambig(grandchild.children)
1915 if collapsed:
1916 for child in collapsed:
1917 child.children += children[1:]
1918 result += collapsed
1919 else:
1920 new_tree = self.tree_class('_inter', grandchild.children + children[1:])
1921 result.append(new_tree)
1922 return result
1924 collapsed = _collapse_iambig(children)
1925 if collapsed:
1926 processed_nodes = [self.node_builder(c.children) for c in collapsed]
1927 return self.tree_class('_ambig', processed_nodes)
1929 return self.node_builder(children)
1933def inplace_transformer(func):
1934 @wraps(func)
1935 def f(children):
1936 ##
1938 tree = Tree(func.__name__, children)
1939 return func(tree)
1940 return f
1943def apply_visit_wrapper(func, name, wrapper):
1944 if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
1945 raise NotImplementedError("Meta args not supported for internal transformer")
1947 @wraps(func)
1948 def f(children):
1949 return wrapper(func, name, children, None)
1950 return f
1953class ParseTreeBuilder:
1954 def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False):
1955 self.tree_class = tree_class
1956 self.propagate_positions = propagate_positions
1957 self.ambiguous = ambiguous
1958 self.maybe_placeholders = maybe_placeholders
1960 self.rule_builders = list(self._init_builders(rules))
1962 def _init_builders(self, rules):
1963 propagate_positions = make_propagate_positions(self.propagate_positions)
1965 for rule in rules:
1966 options = rule.options
1967 keep_all_tokens = options.keep_all_tokens
1968 expand_single_child = options.expand1
1970 wrapper_chain = list(filter(None, [
1971 (expand_single_child and not rule.alias) and ExpandSingleChild,
1972 maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
1973 propagate_positions,
1974 self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
1975 self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class)
1976 ]))
1978 yield rule, wrapper_chain
1980 def create_callback(self, transformer=None):
1981 callbacks = {}
1983 default_handler = getattr(transformer, '__default__', None)
1984 if default_handler:
1985 def default_callback(data, children):
1986 return default_handler(data, children, None)
1987 else:
1988 default_callback = self.tree_class
1990 for rule, wrapper_chain in self.rule_builders:
1992 user_callback_name = rule.alias or rule.options.template_source or rule.origin.name
1993 try:
1994 f = getattr(transformer, user_callback_name)
1995 wrapper = getattr(f, 'visit_wrapper', None)
1996 if wrapper is not None:
1997 f = apply_visit_wrapper(f, user_callback_name, wrapper)
1998 elif isinstance(transformer, Transformer_InPlace):
1999 f = inplace_transformer(f)
2000 except AttributeError:
2001 f = partial(default_callback, user_callback_name)
2003 for w in wrapper_chain:
2004 f = w(f)
2006 if rule in callbacks:
2007 raise GrammarError("Rule '%s' already exists" % (rule,))
2009 callbacks[rule] = f
2011 return callbacks
2015class LALR_Parser(Serialize):
2016 def __init__(self, parser_conf, debug=False):
2017 analysis = LALR_Analyzer(parser_conf, debug=debug)
2018 analysis.compute_lalr()
2019 callbacks = parser_conf.callbacks
2021 self._parse_table = analysis.parse_table
2022 self.parser_conf = parser_conf
2023 self.parser = _Parser(analysis.parse_table, callbacks, debug)
2025 @classmethod
2026 def deserialize(cls, data, memo, callbacks, debug=False):
2027 inst = cls.__new__(cls)
2028 inst._parse_table = IntParseTable.deserialize(data, memo)
2029 inst.parser = _Parser(inst._parse_table, callbacks, debug)
2030 return inst
2032 def serialize(self, memo: Any = None) -> Dict[str, Any]:
2033 return self._parse_table.serialize(memo)
2035 def parse_interactive(self, lexer, start):
2036 return self.parser.parse(lexer, start, start_interactive=True)
2038 def parse(self, lexer, start, on_error=None):
2039 try:
2040 return self.parser.parse(lexer, start)
2041 except UnexpectedInput as e:
2042 if on_error is None:
2043 raise
2045 while True:
2046 if isinstance(e, UnexpectedCharacters):
2047 s = e.interactive_parser.lexer_thread.state
2048 p = s.line_ctr.char_pos
2050 if not on_error(e):
2051 raise e
2053 if isinstance(e, UnexpectedCharacters):
2054 ##
2056 if p == s.line_ctr.char_pos:
2057 s.line_ctr.feed(s.text[p:p+1])
2059 try:
2060 return e.interactive_parser.resume_parse()
2061 except UnexpectedToken as e2:
2062 if (isinstance(e, UnexpectedToken)
2063 and e.token.type == e2.token.type == '$END'
2064 and e.interactive_parser == e2.interactive_parser):
2065 ##
2067 raise e2
2068 e = e2
2069 except UnexpectedCharacters as e2:
2070 e = e2
2073class ParseConf:
2074 __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
2076 def __init__(self, parse_table, callbacks, start):
2077 self.parse_table = parse_table
2079 self.start_state = self.parse_table.start_states[start]
2080 self.end_state = self.parse_table.end_states[start]
2081 self.states = self.parse_table.states
2083 self.callbacks = callbacks
2084 self.start = start
2087class ParserState:
2088 __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
2090 def __init__(self, parse_conf, lexer, state_stack=None, value_stack=None):
2091 self.parse_conf = parse_conf
2092 self.lexer = lexer
2093 self.state_stack = state_stack or [self.parse_conf.start_state]
2094 self.value_stack = value_stack or []
2096 @property
2097 def position(self):
2098 return self.state_stack[-1]
2100 ##
2102 def __eq__(self, other):
2103 if not isinstance(other, ParserState):
2104 return NotImplemented
2105 return len(self.state_stack) == len(other.state_stack) and self.position == other.position
2107 def __copy__(self):
2108 return type(self)(
2109 self.parse_conf,
2110 self.lexer, ##
2112 copy(self.state_stack),
2113 deepcopy(self.value_stack),
2114 )
2116 def copy(self):
2117 return copy(self)
2119 def feed_token(self, token, is_end=False):
2120 state_stack = self.state_stack
2121 value_stack = self.value_stack
2122 states = self.parse_conf.states
2123 end_state = self.parse_conf.end_state
2124 callbacks = self.parse_conf.callbacks
2126 while True:
2127 state = state_stack[-1]
2128 try:
2129 action, arg = states[state][token.type]
2130 except KeyError:
2131 expected = {s for s in states[state].keys() if s.isupper()}
2132 raise UnexpectedToken(token, expected, state=self, interactive_parser=None)
2134 assert arg != end_state
2136 if action is Shift:
2137 ##
2139 assert not is_end
2140 state_stack.append(arg)
2141 value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
2142 return
2143 else:
2144 ##
2146 rule = arg
2147 size = len(rule.expansion)
2148 if size:
2149 s = value_stack[-size:]
2150 del state_stack[-size:]
2151 del value_stack[-size:]
2152 else:
2153 s = []
2155 value = callbacks[rule](s)
2157 _action, new_state = states[state_stack[-1]][rule.origin.name]
2158 assert _action is Shift
2159 state_stack.append(new_state)
2160 value_stack.append(value)
2162 if is_end and state_stack[-1] == end_state:
2163 return value_stack[-1]
2165class _Parser:
2166 def __init__(self, parse_table, callbacks, debug=False):
2167 self.parse_table = parse_table
2168 self.callbacks = callbacks
2169 self.debug = debug
2171 def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False):
2172 parse_conf = ParseConf(self.parse_table, self.callbacks, start)
2173 parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
2174 if start_interactive:
2175 return InteractiveParser(self, parser_state, parser_state.lexer)
2176 return self.parse_from_state(parser_state)
2179 def parse_from_state(self, state):
2180 ##
2182 try:
2183 token = None
2184 for token in state.lexer.lex(state):
2185 state.feed_token(token)
2187 end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
2188 return state.feed_token(end_token, True)
2189 except UnexpectedInput as e:
2190 try:
2191 e.interactive_parser = InteractiveParser(self, state, state.lexer)
2192 except NameError:
2193 pass
2194 raise e
2195 except Exception as e:
2196 if self.debug:
2197 print("")
2198 print("STATE STACK DUMP")
2199 print("----------------")
2200 for i, s in enumerate(state.state_stack):
2201 print('%d)' % i , s)
2202 print("")
2204 raise
2207class Action:
2208 def __init__(self, name):
2209 self.name = name
2210 def __str__(self):
2211 return self.name
2212 def __repr__(self):
2213 return str(self)
2215Shift = Action('Shift')
2216Reduce = Action('Reduce')
2219class ParseTable:
2220 def __init__(self, states, start_states, end_states):
2221 self.states = states
2222 self.start_states = start_states
2223 self.end_states = end_states
2225 def serialize(self, memo):
2226 tokens = Enumerator()
2228 states = {
2229 state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
2230 for token, (action, arg) in actions.items()}
2231 for state, actions in self.states.items()
2232 }
2234 return {
2235 'tokens': tokens.reversed(),
2236 'states': states,
2237 'start_states': self.start_states,
2238 'end_states': self.end_states,
2239 }
2241 @classmethod
2242 def deserialize(cls, data, memo):
2243 tokens = data['tokens']
2244 states = {
2245 state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
2246 for token, (action, arg) in actions.items()}
2247 for state, actions in data['states'].items()
2248 }
2249 return cls(states, data['start_states'], data['end_states'])
2252class IntParseTable(ParseTable):
2254 @classmethod
2255 def from_ParseTable(cls, parse_table):
2256 enum = list(parse_table.states)
2257 state_to_idx = {s:i for i,s in enumerate(enum)}
2258 int_states = {}
2260 for s, la in parse_table.states.items():
2261 la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
2262 for k,v in la.items()}
2263 int_states[ state_to_idx[s] ] = la
2266 start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
2267 end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
2268 return cls(int_states, start_states, end_states)
2272def _wrap_lexer(lexer_class):
2273 future_interface = getattr(lexer_class, '__future_interface__', False)
2274 if future_interface:
2275 return lexer_class
2276 else:
2277 class CustomLexerWrapper(Lexer):
2278 def __init__(self, lexer_conf):
2279 self.lexer = lexer_class(lexer_conf)
2280 def lex(self, lexer_state, parser_state):
2281 return self.lexer.lex(lexer_state.text)
2282 return CustomLexerWrapper
2285def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options):
2286 parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
2287 cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
2288 parser = cls.deserialize(data['parser'], memo, callbacks, options.debug)
2289 parser_conf.callbacks = callbacks
2290 return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)
2293_parser_creators: 'Dict[str, Callable[[LexerConf, Any, Any], Any]]' = {}
2296class ParsingFrontend(Serialize):
2297 __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser'
2299 def __init__(self, lexer_conf, parser_conf, options, parser=None):
2300 self.parser_conf = parser_conf
2301 self.lexer_conf = lexer_conf
2302 self.options = options
2304 ##
2306 if parser: ##
2308 self.parser = parser
2309 else:
2310 create_parser = _parser_creators.get(parser_conf.parser_type)
2311 assert create_parser is not None, "{} is not supported in standalone mode".format(
2312 parser_conf.parser_type
2313 )
2314 self.parser = create_parser(lexer_conf, parser_conf, options)
2316 ##
2318 lexer_type = lexer_conf.lexer_type
2319 self.skip_lexer = False
2320 if lexer_type in ('dynamic', 'dynamic_complete'):
2321 assert lexer_conf.postlex is None
2322 self.skip_lexer = True
2323 return
2325 try:
2326 create_lexer = {
2327 'basic': create_basic_lexer,
2328 'contextual': create_contextual_lexer,
2329 }[lexer_type]
2330 except KeyError:
2331 assert issubclass(lexer_type, Lexer), lexer_type
2332 self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
2333 else:
2334 self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options)
2336 if lexer_conf.postlex:
2337 self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
2339 def _verify_start(self, start=None):
2340 if start is None:
2341 start_decls = self.parser_conf.start
2342 if len(start_decls) > 1:
2343 raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls)
2344 start ,= start_decls
2345 elif start not in self.parser_conf.start:
2346 raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
2347 return start
2349 def _make_lexer_thread(self, text):
2350 cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
2351 return text if self.skip_lexer else cls.from_text(self.lexer, text)
2353 def parse(self, text, start=None, on_error=None):
2354 chosen_start = self._verify_start(start)
2355 kw = {} if on_error is None else {'on_error': on_error}
2356 stream = self._make_lexer_thread(text)
2357 return self.parser.parse(stream, chosen_start, **kw)
2359 def parse_interactive(self, text=None, start=None):
2360 chosen_start = self._verify_start(start)
2361 if self.parser_conf.parser_type != 'lalr':
2362 raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
2363 stream = self._make_lexer_thread(text)
2364 return self.parser.parse_interactive(stream, chosen_start)
2367def _validate_frontend_args(parser, lexer) -> None:
2368 assert_config(parser, ('lalr', 'earley', 'cyk'))
2369 if not isinstance(lexer, type): ##
2371 expected = {
2372 'lalr': ('basic', 'contextual'),
2373 'earley': ('basic', 'dynamic', 'dynamic_complete'),
2374 'cyk': ('basic', ),
2375 }[parser]
2376 assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser)
2379def _get_lexer_callbacks(transformer, terminals):
2380 result = {}
2381 for terminal in terminals:
2382 callback = getattr(transformer, terminal.name, None)
2383 if callback is not None:
2384 result[terminal.name] = callback
2385 return result
2387class PostLexConnector:
2388 def __init__(self, lexer, postlexer):
2389 self.lexer = lexer
2390 self.postlexer = postlexer
2392 def lex(self, lexer_state, parser_state):
2393 i = self.lexer.lex(lexer_state, parser_state)
2394 return self.postlexer.process(i)
2398def create_basic_lexer(lexer_conf, parser, postlex, options):
2399 cls = (options and options._plugins.get('BasicLexer')) or BasicLexer
2400 return cls(lexer_conf)
2402def create_contextual_lexer(lexer_conf, parser, postlex, options):
2403 cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer
2404 states = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()}
2405 always_accept = postlex.always_accept if postlex else ()
2406 return cls(lexer_conf, states, always_accept=always_accept)
2408def create_lalr_parser(lexer_conf, parser_conf, options=None):
2409 debug = options.debug if options else False
2410 cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
2411 return cls(parser_conf, debug=debug)
2413_parser_creators['lalr'] = create_lalr_parser
2418class PostLex(ABC):
2419 @abstractmethod
2420 def process(self, stream: Iterator[Token]) -> Iterator[Token]:
2421 return stream
2423 always_accept: Iterable[str] = ()
2425class LarkOptions(Serialize):
2426 #--
2428 start: List[str]
2429 debug: bool
2430 transformer: 'Optional[Transformer]'
2431 propagate_positions: Union[bool, str]
2432 maybe_placeholders: bool
2433 cache: Union[bool, str]
2434 regex: bool
2435 g_regex_flags: int
2436 keep_all_tokens: bool
2437 tree_class: Any
2438 parser: _ParserArgType
2439 lexer: _LexerArgType
2440 ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]'
2441 postlex: Optional[PostLex]
2442 priority: 'Optional[Literal["auto", "normal", "invert"]]'
2443 lexer_callbacks: Dict[str, Callable[[Token], Token]]
2444 use_bytes: bool
2445 edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]]
2446 import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]'
2447 source_path: Optional[str]
2449 OPTIONS_DOC = """
2450 **=== General Options ===**
2452 start
2453 The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start")
2454 debug
2455 Display debug information and extra warnings. Use only when debugging (Default: ``False``)
2456 When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed.
2457 transformer
2458 Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
2459 propagate_positions
2460 Propagates (line, column, end_line, end_column) attributes into all tree branches.
2461 Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating.
2462 maybe_placeholders
2463 When ``True``, the ``[]`` operator returns ``None`` when not matched.
2464 When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all.
2465 (default= ``True``)
2466 cache
2467 Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now.
2469 - When ``False``, does nothing (default)
2470 - When ``True``, caches to a temporary file in the local directory
2471 - When given a string, caches to the path pointed by the string
2472 regex
2473 When True, uses the ``regex`` module instead of the stdlib ``re``.
2474 g_regex_flags
2475 Flags that are applied to all terminals (both regex and strings)
2476 keep_all_tokens
2477 Prevent the tree builder from automagically removing "punctuation" tokens (Default: ``False``)
2478 tree_class
2479 Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``.
2481 **=== Algorithm Options ===**
2483 parser
2484 Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley").
2485 (there is also a "cyk" option for legacy)
2486 lexer
2487 Decides whether or not to use a lexer stage
2489 - "auto" (default): Choose for me based on the parser
2490 - "basic": Use a basic lexer
2491 - "contextual": Stronger lexer (only works with parser="lalr")
2492 - "dynamic": Flexible and powerful (only with parser="earley")
2493 - "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible.
2494 ambiguity
2495 Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
2497 - "resolve": The parser will automatically choose the simplest derivation
2498 (it chooses consistently: greedy for tokens, non-greedy for rules)
2499 - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
2500 - "forest": The parser will return the root of the shared packed parse forest.
2502 **=== Misc. / Domain Specific Options ===**
2504 postlex
2505 Lexer post-processing (Default: ``None``) Only works with the basic and contextual lexers.
2506 priority
2507 How priorities should be evaluated - "auto", ``None``, "normal", "invert" (Default: "auto")
2508 lexer_callbacks
2509 Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
2510 use_bytes
2511 Accept an input of type ``bytes`` instead of ``str``.
2512 edit_terminals
2513 A callback for editing the terminals before parse.
2514 import_paths
2515 A List of either paths or loader functions to specify from where grammars are imported
2516 source_path
2517 Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading
2518 **=== End of Options ===**
2519 """
2520 if __doc__:
2521 __doc__ += OPTIONS_DOC
2524 ##
2526 ##
2528 ##
2530 ##
2532 ##
2534 ##
2536 _defaults: Dict[str, Any] = {
2537 'debug': False,
2538 'keep_all_tokens': False,
2539 'tree_class': None,
2540 'cache': False,
2541 'postlex': None,
2542 'parser': 'earley',
2543 'lexer': 'auto',
2544 'transformer': None,
2545 'start': 'start',
2546 'priority': 'auto',
2547 'ambiguity': 'auto',
2548 'regex': False,
2549 'propagate_positions': False,
2550 'lexer_callbacks': {},
2551 'maybe_placeholders': True,
2552 'edit_terminals': None,
2553 'g_regex_flags': 0,
2554 'use_bytes': False,
2555 'import_paths': [],
2556 'source_path': None,
2557 '_plugins': {},
2558 }
2560 def __init__(self, options_dict: Dict[str, Any]) -> None:
2561 o = dict(options_dict)
2563 options = {}
2564 for name, default in self._defaults.items():
2565 if name in o:
2566 value = o.pop(name)
2567 if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'):
2568 value = bool(value)
2569 else:
2570 value = default
2572 options[name] = value
2574 if isinstance(options['start'], str):
2575 options['start'] = [options['start']]
2577 self.__dict__['options'] = options
2580 assert_config(self.parser, ('earley', 'lalr', 'cyk', None))
2582 if self.parser == 'earley' and self.transformer:
2583 raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. '
2584 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')
2586 if o:
2587 raise ConfigurationError("Unknown options: %s" % o.keys())
2589 def __getattr__(self, name: str) -> Any:
2590 try:
2591 return self.__dict__['options'][name]
2592 except KeyError as e:
2593 raise AttributeError(e)
2595 def __setattr__(self, name: str, value: str) -> None:
2596 assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s")
2597 self.options[name] = value
2599 def serialize(self, memo = None) -> Dict[str, Any]:
2600 return self.options
2602 @classmethod
2603 def deserialize(cls, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]]) -> "LarkOptions":
2604 return cls(data)
2607##
2609##
2611_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class', '_plugins'}
2613_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
2614_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')
2617_T = TypeVar('_T', bound="Lark")
2619class Lark(Serialize):
2620 #--
2622 source_path: str
2623 source_grammar: str
2624 grammar: 'Grammar'
2625 options: LarkOptions
2626 lexer: Lexer
2627 terminals: Collection[TerminalDef]
2629 def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
2630 self.options = LarkOptions(options)
2631 re_module: types.ModuleType
2633 ##
2635 use_regex = self.options.regex
2636 if use_regex:
2637 if _has_regex:
2638 re_module = regex
2639 else:
2640 raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.')
2641 else:
2642 re_module = re
2644 ##
2646 if self.options.source_path is None:
2647 try:
2648 self.source_path = grammar.name ##
2650 except AttributeError:
2651 self.source_path = '<string>'
2652 else:
2653 self.source_path = self.options.source_path
2655 ##
2657 try:
2658 read = grammar.read ##
2660 except AttributeError:
2661 pass
2662 else:
2663 grammar = read()
2665 cache_fn = None
2666 cache_md5 = None
2667 if isinstance(grammar, str):
2668 self.source_grammar = grammar
2669 if self.options.use_bytes:
2670 if not isascii(grammar):
2671 raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
2673 if self.options.cache:
2674 if self.options.parser != 'lalr':
2675 raise ConfigurationError("cache only works with parser='lalr' for now")
2677 unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals', '_plugins')
2678 options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
2679 from . import __version__
2680 s = grammar + options_str + __version__ + str(sys.version_info[:2])
2681 cache_md5 = md5_digest(s)
2683 if isinstance(self.options.cache, str):
2684 cache_fn = self.options.cache
2685 else:
2686 if self.options.cache is not True:
2687 raise ConfigurationError("cache argument must be bool or str")
2689 try:
2690 username = getpass.getuser()
2691 except Exception:
2692 ##
2694 ##
2696 ##
2698 username = "unknown"
2700 cache_fn = tempfile.gettempdir() + "/.lark_cache_%s_%s_%s_%s.tmp" % (username, cache_md5, *sys.version_info[:2])
2702 old_options = self.options
2703 try:
2704 with FS.open(cache_fn, 'rb') as f:
2705 logger.debug('Loading grammar from cache: %s', cache_fn)
2706 ##
2708 for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
2709 del options[name]
2710 file_md5 = f.readline().rstrip(b'\n')
2711 cached_used_files = pickle.load(f)
2712 if file_md5 == cache_md5.encode('utf8') and verify_used_files(cached_used_files):
2713 cached_parser_data = pickle.load(f)
2714 self._load(cached_parser_data, **options)
2715 return
2716 except FileNotFoundError:
2717 ##
2719 pass
2720 except Exception: ##
2722 logger.exception("Failed to load Lark from cache: %r. We will try to carry on.", cache_fn)
2724 ##
2726 ##
2728 self.options = old_options
2731 ##
2733 self.grammar, used_files = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens)
2734 else:
2735 assert isinstance(grammar, Grammar)
2736 self.grammar = grammar
2739 if self.options.lexer == 'auto':
2740 if self.options.parser == 'lalr':
2741 self.options.lexer = 'contextual'
2742 elif self.options.parser == 'earley':
2743 if self.options.postlex is not None:
2744 logger.info("postlex can't be used with the dynamic lexer, so we use 'basic' instead. "
2745 "Consider using lalr with contextual instead of earley")
2746 self.options.lexer = 'basic'
2747 else:
2748 self.options.lexer = 'dynamic'
2749 elif self.options.parser == 'cyk':
2750 self.options.lexer = 'basic'
2751 else:
2752 assert False, self.options.parser
2753 lexer = self.options.lexer
2754 if isinstance(lexer, type):
2755 assert issubclass(lexer, Lexer) ##
2757 else:
2758 assert_config(lexer, ('basic', 'contextual', 'dynamic', 'dynamic_complete'))
2759 if self.options.postlex is not None and 'dynamic' in lexer:
2760 raise ConfigurationError("Can't use postlex with a dynamic lexer. Use basic or contextual instead")
2762 if self.options.ambiguity == 'auto':
2763 if self.options.parser == 'earley':
2764 self.options.ambiguity = 'resolve'
2765 else:
2766 assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s")
2768 if self.options.priority == 'auto':
2769 self.options.priority = 'normal'
2771 if self.options.priority not in _VALID_PRIORITY_OPTIONS:
2772 raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
2773 if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
2774 raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))
2776 if self.options.parser is None:
2777 terminals_to_keep = '*'
2778 elif self.options.postlex is not None:
2779 terminals_to_keep = set(self.options.postlex.always_accept)
2780 else:
2781 terminals_to_keep = set()
2783 ##
2785 self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep)
2787 if self.options.edit_terminals:
2788 for t in self.terminals:
2789 self.options.edit_terminals(t)
2791 self._terminals_dict = {t.name: t for t in self.terminals}
2793 ##
2795 if self.options.priority == 'invert':
2796 for rule in self.rules:
2797 if rule.options.priority is not None:
2798 rule.options.priority = -rule.options.priority
2799 for term in self.terminals:
2800 term.priority = -term.priority
2801 ##
2803 ##
2805 ##
2807 elif self.options.priority is None:
2808 for rule in self.rules:
2809 if rule.options.priority is not None:
2810 rule.options.priority = None
2811 for term in self.terminals:
2812 term.priority = 0
2814 ##
2816 self.lexer_conf = LexerConf(
2817 self.terminals, re_module, self.ignore_tokens, self.options.postlex,
2818 self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes
2819 )
2821 if self.options.parser:
2822 self.parser = self._build_parser()
2823 elif lexer:
2824 self.lexer = self._build_lexer()
2826 if cache_fn:
2827 logger.debug('Saving grammar to cache: %s', cache_fn)
2828 try:
2829 with FS.open(cache_fn, 'wb') as f:
2830 assert cache_md5 is not None
2831 f.write(cache_md5.encode('utf8') + b'\n')
2832 pickle.dump(used_files, f)
2833 self.save(f, _LOAD_ALLOWED_OPTIONS)
2834 except IOError as e:
2835 logger.exception("Failed to save Lark to cache: %r.", cache_fn, e)
2837 if __doc__:
2838 __doc__ += "\n\n" + LarkOptions.OPTIONS_DOC
2840 __serialize_fields__ = 'parser', 'rules', 'options'
2842 def _build_lexer(self, dont_ignore: bool=False) -> BasicLexer:
2843 lexer_conf = self.lexer_conf
2844 if dont_ignore:
2845 from copy import copy
2846 lexer_conf = copy(lexer_conf)
2847 lexer_conf.ignore = ()
2848 return BasicLexer(lexer_conf)
2850 def _prepare_callbacks(self) -> None:
2851 self._callbacks = {}
2852 ##
2854 if self.options.ambiguity != 'forest':
2855 self._parse_tree_builder = ParseTreeBuilder(
2856 self.rules,
2857 self.options.tree_class or Tree,
2858 self.options.propagate_positions,
2859 self.options.parser != 'lalr' and self.options.ambiguity == 'explicit',
2860 self.options.maybe_placeholders
2861 )
2862 self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
2863 self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals))
2865 def _build_parser(self) -> "ParsingFrontend":
2866 self._prepare_callbacks()
2867 _validate_frontend_args(self.options.parser, self.options.lexer)
2868 parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
2869 return _construct_parsing_frontend(
2870 self.options.parser,
2871 self.options.lexer,
2872 self.lexer_conf,
2873 parser_conf,
2874 options=self.options
2875 )
2877 def save(self, f, exclude_options: Collection[str] = ()) -> None:
2878 #--
2879 data, m = self.memo_serialize([TerminalDef, Rule])
2880 if exclude_options:
2881 data["options"] = {n: v for n, v in data["options"].items() if n not in exclude_options}
2882 pickle.dump({'data': data, 'memo': m}, f, protocol=pickle.HIGHEST_PROTOCOL)
2884 @classmethod
2885 def load(cls: Type[_T], f) -> _T:
2886 #--
2887 inst = cls.__new__(cls)
2888 return inst._load(f)
2890 def _deserialize_lexer_conf(self, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]], options: LarkOptions) -> LexerConf:
2891 lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
2892 lexer_conf.callbacks = options.lexer_callbacks or {}
2893 lexer_conf.re_module = regex if options.regex else re
2894 lexer_conf.use_bytes = options.use_bytes
2895 lexer_conf.g_regex_flags = options.g_regex_flags
2896 lexer_conf.skip_validation = True
2897 lexer_conf.postlex = options.postlex
2898 return lexer_conf
2900 def _load(self: _T, f: Any, **kwargs) -> _T:
2901 if isinstance(f, dict):
2902 d = f
2903 else:
2904 d = pickle.load(f)
2905 memo_json = d['memo']
2906 data = d['data']
2908 assert memo_json
2909 memo = SerializeMemoizer.deserialize(memo_json, {'Rule': Rule, 'TerminalDef': TerminalDef}, {})
2910 options = dict(data['options'])
2911 if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults):
2912 raise ConfigurationError("Some options are not allowed when loading a Parser: {}"
2913 .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS))
2914 options.update(kwargs)
2915 self.options = LarkOptions.deserialize(options, memo)
2916 self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
2917 self.source_path = '<deserialized>'
2918 _validate_frontend_args(self.options.parser, self.options.lexer)
2919 self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options)
2920 self.terminals = self.lexer_conf.terminals
2921 self._prepare_callbacks()
2922 self._terminals_dict = {t.name: t for t in self.terminals}
2923 self.parser = _deserialize_parsing_frontend(
2924 data['parser'],
2925 memo,
2926 self.lexer_conf,
2927 self._callbacks,
2928 self.options, ##
2930 )
2931 return self
2933 @classmethod
2934 def _load_from_dict(cls, data, memo, **kwargs):
2935 inst = cls.__new__(cls)
2936 return inst._load({'data': data, 'memo': memo}, **kwargs)
2938 @classmethod
2939 def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T:
2940 #--
2941 if rel_to:
2942 basepath = os.path.dirname(rel_to)
2943 grammar_filename = os.path.join(basepath, grammar_filename)
2944 with open(grammar_filename, encoding='utf8') as f:
2945 return cls(f, **options)
2947 @classmethod
2948 def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: 'Sequence[str]'=[""], **options) -> _T:
2949 #--
2950 package_loader = FromPackageLoader(package, search_paths)
2951 full_path, text = package_loader(None, grammar_path)
2952 options.setdefault('source_path', full_path)
2953 options.setdefault('import_paths', [])
2954 options['import_paths'].append(package_loader)
2955 return cls(text, **options)
2957 def __repr__(self):
2958 return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)
2961 def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]:
2962 #--
2963 lexer: Lexer
2964 if not hasattr(self, 'lexer') or dont_ignore:
2965 lexer = self._build_lexer(dont_ignore)
2966 else:
2967 lexer = self.lexer
2968 lexer_thread = LexerThread.from_text(lexer, text)
2969 stream = lexer_thread.lex(None)
2970 if self.options.postlex:
2971 return self.options.postlex.process(stream)
2972 return stream
2974 def get_terminal(self, name: str) -> TerminalDef:
2975 #--
2976 return self._terminals_dict[name]
2978 def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser':
2979 #--
2980 return self.parser.parse_interactive(text, start=start)
2982 def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> 'ParseTree':
2983 #--
2984 return self.parser.parse(text, start=start, on_error=on_error)
2989class DedentError(LarkError):
2990 pass
2992class Indenter(PostLex, ABC):
2993 paren_level: int
2994 indent_level: List[int]
2996 def __init__(self) -> None:
2997 self.paren_level = 0
2998 self.indent_level = [0]
2999 assert self.tab_len > 0
3001 def handle_NL(self, token: Token) -> Iterator[Token]:
3002 if self.paren_level > 0:
3003 return
3005 yield token
3007 indent_str = token.rsplit('\n', 1)[1] ##
3009 indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
3011 if indent > self.indent_level[-1]:
3012 self.indent_level.append(indent)
3013 yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
3014 else:
3015 while indent < self.indent_level[-1]:
3016 self.indent_level.pop()
3017 yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
3019 if indent != self.indent_level[-1]:
3020 raise DedentError('Unexpected dedent to column %s. Expected dedent to %s' % (indent, self.indent_level[-1]))
3022 def _process(self, stream):
3023 for token in stream:
3024 if token.type == self.NL_type:
3025 yield from self.handle_NL(token)
3026 else:
3027 yield token
3029 if token.type in self.OPEN_PAREN_types:
3030 self.paren_level += 1
3031 elif token.type in self.CLOSE_PAREN_types:
3032 self.paren_level -= 1
3033 assert self.paren_level >= 0
3035 while len(self.indent_level) > 1:
3036 self.indent_level.pop()
3037 yield Token(self.DEDENT_type, '')
3039 assert self.indent_level == [0], self.indent_level
3041 def process(self, stream):
3042 self.paren_level = 0
3043 self.indent_level = [0]
3044 return self._process(stream)
3046 ##
3048 @property
3049 def always_accept(self):
3050 return (self.NL_type,)
3052 @property
3053 @abstractmethod
3054 def NL_type(self) -> str:
3055 raise NotImplementedError()
3057 @property
3058 @abstractmethod
3059 def OPEN_PAREN_types(self) -> List[str]:
3060 raise NotImplementedError()
3062 @property
3063 @abstractmethod
3064 def CLOSE_PAREN_types(self) -> List[str]:
3065 raise NotImplementedError()
3067 @property
3068 @abstractmethod
3069 def INDENT_type(self) -> str:
3070 raise NotImplementedError()
3072 @property
3073 @abstractmethod
3074 def DEDENT_type(self) -> str:
3075 raise NotImplementedError()
3077 @property
3078 @abstractmethod
3079 def tab_len(self) -> int:
3080 raise NotImplementedError()
3083class PythonIndenter(Indenter):
3084 NL_type = '_NEWLINE'
3085 OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
3086 CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
3087 INDENT_type = '_INDENT'
3088 DEDENT_type = '_DEDENT'
3089 tab_len = 8
3092import pickle, zlib, base64
3093DATA = (
3094{'parser': {'lexer_conf': {'terminals': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}, {'@': 10}, {'@': 11}], 'ignore': ['WS'], 'g_regex_flags': 0, 'use_bytes': False, 'lexer_type': 'contextual', '__type__': 'LexerConf'}, 'parser_conf': {'rules': [{'@': 12}, {'@': 13}, {'@': 14}, {'@': 15}, {'@': 16}, {'@': 17}, {'@': 18}, {'@': 19}, {'@': 20}, {'@': 21}, {'@': 22}, {'@': 23}, {'@': 24}, {'@': 25}, {'@': 26}, {'@': 27}, {'@': 28}, {'@': 29}, {'@': 30}, {'@': 31}, {'@': 32}], 'start': ['start'], 'parser_type': 'lalr', '__type__': 'ParserConf'}, 'parser': {'tokens': {0: 'LBRACE', 1: 'object', 2: 'ESCAPED_STRING', 3: 'LSQB', 4: 'SIGNED_NUMBER', 5: 'start', 6: 'array', 7: 'string', 8: 'value', 9: 'NULL', 10: 'FALSE', 11: 'TRUE', 12: 'COMMA', 13: 'RBRACE', 14: '$END', 15: 'RSQB', 16: 'COLON', 17: 'item', 18: '_items', 19: '___items_star_1', 20: '__array_star_0'}, 'states': {0: {0: (0, 9), 1: (0, 24), 2: (0, 7), 3: (0, 28), 4: (0, 31), 5: (0, 21), 6: (0, 2), 7: (0, 4), 8: (0, 23), 9: (0, 19), 10: (0, 32), 11: (0, 25)}, 1: {12: (1, {'@': 32}), 13: (1, {'@': 32})}, 2: {14: (1, {'@': 14}), 12: (1, {'@': 14}), 13: (1, {'@': 14}), 15: (1, {'@': 14})}, 3: {15: (1, {'@': 29}), 12: (1, {'@': 29})}, 4: {14: (1, {'@': 15}), 12: (1, {'@': 15}), 13: (1, {'@': 15}), 15: (1, {'@': 15})}, 5: {16: (0, 10)}, 6: {14: (1, {'@': 22}), 12: (1, {'@': 22}), 13: (1, {'@': 22}), 15: (1, {'@': 22})}, 7: {14: (1, {'@': 28}), 16: (1, {'@': 28}), 12: (1, {'@': 28}), 13: (1, {'@': 28}), 15: (1, {'@': 28})}, 8: {12: (1, {'@': 25}), 13: (1, {'@': 25})}, 9: {7: (0, 5), 17: (0, 13), 2: (0, 7), 18: (0, 20), 13: (0, 33)}, 10: {0: (0, 9), 1: (0, 24), 2: (0, 7), 3: (0, 28), 4: (0, 31), 6: (0, 2), 7: (0, 4), 9: (0, 19), 8: (0, 8), 10: (0, 32), 11: (0, 25)}, 11: {0: (0, 9), 1: (0, 24), 2: (0, 7), 3: (0, 28), 4: (0, 31), 8: (0, 3), 6: (0, 2), 7: (0, 4), 9: (0, 19), 10: (0, 32), 11: (0, 25)}, 12: {7: (0, 5), 2: (0, 7), 17: (0, 1)}, 13: {12: (0, 14), 19: (0, 29), 13: (1, {'@': 27})}, 14: {7: (0, 5), 17: (0, 17), 2: (0, 7)}, 15: {14: (1, {'@': 20}), 12: (1, {'@': 20}), 13: (1, {'@': 20}), 15: (1, {'@': 20})}, 16: {0: (0, 9), 1: (0, 24), 2: (0, 7), 3: (0, 28), 8: (0, 22), 4: (0, 31), 6: (0, 2), 7: (0, 4), 9: (0, 19), 10: (0, 32), 11: (0, 25)}, 17: {12: (1, {'@': 31}), 13: (1, {'@': 31})}, 18: {12: (0, 16), 15: (0, 15)}, 19: {14: (1, {'@': 19}), 12: (1, {'@': 19}), 13: (1, {'@': 19}), 15: (1, {'@': 19})}, 20: {13: (0, 26)}, 21: {}, 22: {15: (1, {'@': 30}), 12: (1, {'@': 30})}, 23: {14: (1, {'@': 12})}, 24: {14: (1, {'@': 13}), 12: (1, {'@': 13}), 13: (1, {'@': 13}), 15: (1, {'@': 13})}, 25: {14: (1, {'@': 17}), 12: (1, {'@': 17}), 13: (1, {'@': 17}), 15: (1, {'@': 17})}, 26: {14: (1, {'@': 23}), 12: (1, {'@': 23}), 13: (1, {'@': 23}), 15: (1, {'@': 23})}, 27: {12: (0, 11), 15: (0, 30), 20: (0, 18)}, 28: {3: (0, 28), 6: (0, 2), 15: (0, 6), 8: (0, 27), 10: (0, 32), 0: (0, 9), 1: (0, 24), 2: (0, 7), 4: (0, 31), 7: (0, 4), 9: (0, 19), 11: (0, 25)}, 29: {12: (0, 12), 13: (1, {'@': 26})}, 30: {14: (1, {'@': 21}), 12: (1, {'@': 21}), 13: (1, {'@': 21}), 15: (1, {'@': 21})}, 31: {14: (1, {'@': 16}), 12: (1, {'@': 16}), 13: (1, {'@': 16}), 15: (1, {'@': 16})}, 32: {14: (1, {'@': 18}), 12: (1, {'@': 18}), 13: (1, {'@': 18}), 15: (1, {'@': 18})}, 33: {14: (1, {'@': 24}), 12: (1, {'@': 24}), 13: (1, {'@': 24}), 15: (1, {'@': 24})}}, 'start_states': {'start': 0}, 'end_states': {'start': 21}}, '__type__': 'ParsingFrontend'}, 'rules': [{'@': 12}, {'@': 13}, {'@': 14}, {'@': 15}, {'@': 16}, {'@': 17}, {'@': 18}, {'@': 19}, {'@': 20}, {'@': 21}, {'@': 22}, {'@': 23}, {'@': 24}, {'@': 25}, {'@': 26}, {'@': 27}, {'@': 28}, {'@': 29}, {'@': 30}, {'@': 31}, {'@': 32}], 'options': {'debug': False, 'keep_all_tokens': False, 'tree_class': None, 'cache': False, 'postlex': None, 'parser': 'lalr', 'lexer': 'contextual', 'transformer': None, 'start': ['start'], 'priority': 'normal', 'ambiguity': 'auto', 'regex': False, 'propagate_positions': True, 'lexer_callbacks': {}, 'maybe_placeholders': False, 'edit_terminals': None, 'g_regex_flags': 0, 'use_bytes': False, 'import_paths': [], 'source_path': None, '_plugins': {}}, '__type__': 'Lark'}
3095)
3096MEMO = (
3097{0: {'name': 'SIGNED_NUMBER', 'pattern': {'value': '(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+)', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 0, '__type__': 'TerminalDef'}, 1: {'name': 'ESCAPED_STRING', 'pattern': {'value': '".*?(?<!\\\\)(\\\\\\\\)*?"', 'flags': [], '_width': [2, 4294967295], '__type__': 'PatternRE'}, 'priority': 0, '__type__': 'TerminalDef'}, 2: {'name': 'WS', 'pattern': {'value': '(?:[ \t\x0c\r\n])+', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 0, '__type__': 'TerminalDef'}, 3: {'name': 'COLON', 'pattern': {'value': ':', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 4: {'name': 'TRUE', 'pattern': {'value': 'true', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 5: {'name': 'FALSE', 'pattern': {'value': 'false', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 6: {'name': 'NULL', 'pattern': {'value': 'null', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 7: {'name': 'COMMA', 'pattern': {'value': ',', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 8: {'name': 'LSQB', 'pattern': {'value': '[', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 9: {'name': 'RSQB', 'pattern': {'value': ']', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 10: {'name': 'LBRACE', 'pattern': {'value': '{', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 11: {'name': 'RBRACE', 'pattern': {'value': '}', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 12: {'origin': {'name': Token('RULE', 'start'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'value', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 13: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'object', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 14: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'array', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 15: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'string', '__type__': 'NonTerminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 16: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'SIGNED_NUMBER', 'filter_out': False, '__type__': 'Terminal'}], 'order': 3, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 17: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'TRUE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 4, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 18: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'FALSE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 5, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 19: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'NULL', 'filter_out': True, '__type__': 'Terminal'}], 'order': 6, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 20: {'origin': {'name': Token('RULE', 'array'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}, {'name': '__array_star_0', '__type__': 'NonTerminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 21: {'origin': {'name': Token('RULE', 'array'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 22: {'origin': {'name': Token('RULE', 'array'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': [False, True, False], '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 23: {'origin': {'name': Token('RULE', 'object'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LBRACE', 'filter_out': True, '__type__': 'Terminal'}, {'name': '_items', '__type__': 'NonTerminal'}, {'name': 'RBRACE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 24: {'origin': {'name': Token('RULE', 'object'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LBRACE', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'RBRACE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 25: {'origin': {'name': Token('RULE', 'item'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'string', '__type__': 'NonTerminal'}, {'name': 'COLON', 'filter_out': False, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 26: {'origin': {'name': Token('RULE', '_items'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'item', '__type__': 'NonTerminal'}, {'name': '___items_star_1', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 27: {'origin': {'name': Token('RULE', '_items'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'item', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 28: {'origin': {'name': Token('RULE', 'string'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'ESCAPED_STRING', 'filter_out': False, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 29: {'origin': {'name': '__array_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 30: {'origin': {'name': '__array_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': '__array_star_0', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 31: {'origin': {'name': '___items_star_1', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'item', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 32: {'origin': {'name': '___items_star_1', '__type__': 'NonTerminal'}, 'expansion': [{'name': '___items_star_1', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'item', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}}
3098)
3099Shift = 0
3100Reduce = 1
3101def Lark_StandAlone(**kwargs):
3102 return Lark._load_from_dict(DATA, MEMO, **kwargs)