Coverage for src/m6rclib/metaphor_parser.py: 100%
209 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-12 12:16 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-12 12:16 +0000
1# Copyright 2024 M6R Ltd.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15import glob
16import os
17from pathlib import Path
19from typing import List, Set, Optional, Union
21from .metaphor_token import Token, TokenType
22from .embed_lexer import EmbedLexer
23from .metaphor_lexer import MetaphorLexer
24from .metaphor_ast_node import MetaphorASTNode, MetaphorASTNodeType
26class MetaphorParserFileAlreadyUsedError(Exception):
27 """Exception raised when a file is used more than once."""
28 def __init__(self, filename: str, token: Token) -> None:
29 super().__init__(f"The file '{filename}' has already been used.")
30 self.filename: str = filename
31 self.token: Token = token
34class MetaphorParserSyntaxError(Exception):
35 """Exception generated when there is a syntax error."""
36 def __init__(self, message: str, filename: str, line: int, column: int, input_text: str) -> None:
37 super().__init__(f"{message}: file: {filename}, line {line}, column {column}, ")
38 self.message: str = message
39 self.filename: str = filename
40 self.line: int = line
41 self.column: int = column
42 self.input_text: str = input_text
45class MetaphorParserError(Exception):
46 """Exception wrapper generated when there is a syntax error."""
47 def __init__(self, message: str, errors: List[MetaphorParserSyntaxError]) -> None:
48 super().__init__(message)
49 self.errors: List[MetaphorParserSyntaxError] = errors
52class MetaphorParser:
53 """
54 Parser class to process tokens and build an Abstract Syntax Tree (AST).
56 Attributes:
57 syntax_tree (MetaphorASTNode): The root node of the AST.
58 parse_errors (List[MetaphorParserSyntaxError]): List of syntax errors encountered during parsing.
59 lexers (List[Union[MetaphorLexer, EmbedLexer]]): Stack of lexers used for parsing multiple files.
60 previously_seen_files (Set[str]): Set of canonical filenames already processed.
61 search_paths (List[str]): List of paths to search for included files.
62 current_token (Optional[Token]): The current token being processed.
63 """
64 def __init__(self) -> None:
65 self.syntax_tree: MetaphorASTNode = MetaphorASTNode(MetaphorASTNodeType.ROOT, "")
66 self.parse_errors: List[MetaphorParserSyntaxError] = []
67 self.lexers: List[Union[MetaphorLexer, EmbedLexer]] = []
68 self.previously_seen_files: Set[str] = set()
69 self.search_paths: List[str] = []
70 self.current_token: Optional[Token] = None
72 def parse(self, input_text: str, filename: str, search_paths: List[str]) -> MetaphorASTNode:
73 """
74 Parse an input string and construct the AST.
76 Args:
77 input_text (str): The text to be parsed.
78 filename (str): The name of the file being parsed.
79 search_paths (List[str]): List of paths to search for included files.
81 Returns:
82 List[Optional[MetaphorASTNode]]: A list containing the role, context, and action AST nodes.
84 Raises:
85 MetaphorParserError: If there are syntax errors during parsing.
86 FileNotFoundError: If a required file cannot be found.
87 """
88 self.search_paths = search_paths
90 try:
91 self.lexers.append(MetaphorLexer(input_text, filename))
93 seen_action_tree: bool = False
94 seen_context_tree: bool = False
95 seen_role_tree: bool = False
97 while True:
98 token = self.get_next_token()
99 if token.type == TokenType.ACTION:
100 if seen_action_tree:
101 self._record_syntax_error(token, "'Action' already defined")
103 self.syntax_tree.attach_child(self._parse_action(token))
104 seen_action_tree = True
105 elif token.type == TokenType.CONTEXT:
106 if seen_context_tree:
107 self._record_syntax_error(token, "'Context' already defined")
109 self.syntax_tree.attach_child(self._parse_context(token))
110 seen_context_tree = True
111 elif token.type == TokenType.ROLE:
112 if seen_role_tree:
113 self._record_syntax_error(token, "'Role' already defined")
115 self.syntax_tree.attach_child(self._parse_role(token))
116 seen_role_tree = True
117 elif token.type == TokenType.END_OF_FILE:
118 if self.parse_errors:
119 raise(MetaphorParserError("parser error", self.parse_errors))
121 return self.syntax_tree
122 else:
123 self._record_syntax_error(token, f"Unexpected token: {token.value} at top level")
124 except FileNotFoundError as e:
125 err_token = self.current_token
126 self.parse_errors.append(MetaphorParserSyntaxError(
127 f"{e}", err_token.filename, err_token.line, err_token.column, err_token.input
128 ))
129 raise(MetaphorParserError("parser error", self.parse_errors)) from e
130 except MetaphorParserFileAlreadyUsedError as e:
131 self.parse_errors.append(MetaphorParserSyntaxError(
132 f"The file '{e.filename}' has already been used",
133 e.token.filename,
134 e.token.line,
135 e.token.column,
136 e.token.input
137 ))
138 raise(MetaphorParserError("parser error", self.parse_errors)) from e
140 def parse_file(self, filename: str, search_paths: List[str]) -> MetaphorASTNode:
141 """
142 Parse a file and construct the AST.
144 Args:
145 filename (str): The path to the file to be parsed.
146 search_paths (List[str]): List of paths to search for included files.
148 Returns:
149 List[Optional[MetaphorASTNode]]: A list containing the role, context, and action AST nodes.
151 Raises:
152 MetaphorParserError: If there are syntax errors during parsing.
153 FileNotFoundError: If the file cannot be found.
154 """
155 try:
156 self._check_file_not_loaded(filename)
157 input_text = self._read_file(filename)
158 return self.parse(input_text, filename, search_paths)
159 except FileNotFoundError as e:
160 self.parse_errors.append(MetaphorParserSyntaxError(
161 f"{e}", "", 0, 0, ""
162 ))
163 raise(MetaphorParserError("parser error", self.parse_errors)) from e
164 except MetaphorParserError as e:
165 raise(MetaphorParserError("parser error", self.parse_errors)) from e
167 def get_next_token(self) -> Token:
168 """Get the next token from the active lexer."""
169 while self.lexers:
170 lexer = self.lexers[-1]
171 token = lexer.get_next_token()
172 self.current_token = token
174 if token.type == TokenType.INCLUDE:
175 self._parse_include()
176 elif token.type == TokenType.EMBED:
177 self._parse_embed()
178 elif token.type == TokenType.END_OF_FILE:
179 self.lexers.pop()
180 else:
181 return token
183 return Token(TokenType.END_OF_FILE, "", "", "", 0, 0)
185 def _record_syntax_error(self, token, message):
186 """Raise a syntax error and add it to the error list."""
187 error = MetaphorParserSyntaxError(
188 message, token.filename, token.line, token.column, token.input
189 )
190 self.parse_errors.append(error)
192 def _find_file_path(self, filename):
193 """Try to find a valid path for a file, given all the search path options"""
194 if Path(filename).exists():
195 return filename
197 # If we don't have an absolute path then we can try search paths.
198 if not os.path.isabs(filename):
199 for path in self.search_paths:
200 try_name = os.path.join(path, filename)
201 if Path(try_name).exists():
202 return try_name
204 raise FileNotFoundError(f"File not found: {filename}")
206 def _read_file(self, filename):
207 """Read file content into memory."""
208 try:
209 with open(filename, 'r', encoding='utf-8') as file:
210 return file.read()
211 except FileNotFoundError as e:
212 raise FileNotFoundError(f"File not found: {filename}") from e
213 except PermissionError as e:
214 raise FileNotFoundError(f"You do not have permission to access: {filename}") from e
215 except IsADirectoryError as e:
216 raise FileNotFoundError(f"Is a directory: {filename}") from e
217 except OSError as e:
218 raise FileNotFoundError(f"OS error: {e}") from e
220 def _check_file_not_loaded(self, filename):
221 """Check we have not already loaded a file."""
222 canonical_filename = os.path.realpath(filename)
223 if canonical_filename in self.previously_seen_files:
224 raise MetaphorParserFileAlreadyUsedError(filename, self.current_token)
226 self.previously_seen_files.add(canonical_filename)
228 def _parse_text(self, token):
229 """Parse a text block."""
230 return MetaphorASTNode(MetaphorASTNodeType.TEXT, token.value)
232 def _parse_action(self, token):
233 """Parse an action block and construct its AST node."""
234 label_name = ""
236 init_token = self.get_next_token()
237 if init_token.type == TokenType.KEYWORD_TEXT:
238 label_name = init_token.value
239 indent_token = self.get_next_token()
240 if indent_token.type != TokenType.INDENT:
241 self._record_syntax_error(
242 token,
243 "Expected indent after keyword description for 'Action' block"
244 )
245 elif init_token.type != TokenType.INDENT:
246 self._record_syntax_error(token, "Expected description or indent for 'Action' block")
248 action_node = MetaphorASTNode(MetaphorASTNodeType.ACTION, label_name)
250 while True:
251 token = self.get_next_token()
252 if token.type == TokenType.TEXT:
253 action_node.attach_child(self._parse_text(token))
254 elif token.type == TokenType.OUTDENT or token.type == TokenType.END_OF_FILE:
255 return action_node
256 else:
257 self._record_syntax_error(
258 token,
259 f"Unexpected token: {token.value} in 'Action' block"
260 )
262 def _parse_context(self, token):
263 """Parse a Context block."""
264 label_name = ""
266 seen_token_type = TokenType.NONE
268 init_token = self.get_next_token()
269 if init_token.type == TokenType.KEYWORD_TEXT:
270 label_name = init_token.value
271 indent_token = self.get_next_token()
272 if indent_token.type != TokenType.INDENT:
273 self._record_syntax_error(
274 token,
275 "Expected indent after keyword description for 'Context' block"
276 )
277 elif init_token.type != TokenType.INDENT:
278 self._record_syntax_error(token, "Expected description or indent for 'Context' block")
280 context_node = MetaphorASTNode(MetaphorASTNodeType.CONTEXT, label_name)
282 while True:
283 token = self.get_next_token()
284 if token.type == TokenType.TEXT:
285 if seen_token_type != TokenType.NONE:
286 self._record_syntax_error(token, "Text must come first in a 'Context' block")
288 context_node.attach_child(self._parse_text(token))
289 elif token.type == TokenType.CONTEXT:
290 context_node.attach_child(self._parse_context(token))
291 seen_token_type = TokenType.CONTEXT
292 elif token.type == TokenType.OUTDENT or token.type == TokenType.END_OF_FILE:
293 return context_node
294 else:
295 self._record_syntax_error(token, f"Unexpected token: {token.value} in 'Context' block")
297 def _parse_role(self, token):
298 """Parse a Role block."""
299 label_name = ""
301 init_token = self.get_next_token()
302 if init_token.type == TokenType.KEYWORD_TEXT:
303 label_name = init_token.value
304 indent_token = self.get_next_token()
305 if indent_token.type != TokenType.INDENT:
306 self._record_syntax_error(
307 token,
308 "Expected indent after keyword description for 'Role' block"
309 )
310 elif init_token.type != TokenType.INDENT:
311 self._record_syntax_error(token, "Expected description or indent for 'Role' block")
313 role_node = MetaphorASTNode(MetaphorASTNodeType.ROLE, label_name)
315 while True:
316 token = self.get_next_token()
317 if token.type == TokenType.TEXT:
318 role_node.attach_child(self._parse_text(token))
319 elif token.type == TokenType.OUTDENT or token.type == TokenType.END_OF_FILE:
320 return role_node
321 else:
322 self._record_syntax_error(
323 token,
324 f"Unexpected token: {token.value} in 'Role' block"
325 )
327 def _parse_include(self):
328 """Parse an Include block and load the included file."""
329 token_next = self.get_next_token()
330 if token_next.type != TokenType.KEYWORD_TEXT:
331 self._record_syntax_error(token_next, "Expected file name for 'Include'")
332 return
334 filename = token_next.value
335 self._check_file_not_loaded(filename)
336 try_file = self._find_file_path(filename)
337 input_text = self._read_file(try_file)
338 self.lexers.append(MetaphorLexer(input_text, try_file))
340 def _parse_embed(self):
341 """Parse an Embed block and load the embedded file."""
342 token_next = self.get_next_token()
343 if token_next.type != TokenType.KEYWORD_TEXT:
344 self._record_syntax_error(token_next, "Expected file name or wildcard match for 'Embed'")
345 return
347 recurse = False
348 match = token_next.value
349 if "**/" in match:
350 recurse = True
352 files = glob.glob(match, recursive=recurse)
353 if not files:
354 self._record_syntax_error(token_next, f"{match} does not match any files for 'Embed'")
355 return
357 for file in files:
358 input_text = self._read_file(file)
359 self.lexers.append(EmbedLexer(input_text, file))