Coverage for src/m6rclib/metaphor_parser.py: 100%

209 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-11-12 12:16 +0000

1# Copyright 2024 M6R Ltd. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15import glob 

16import os 

17from pathlib import Path 

18 

19from typing import List, Set, Optional, Union 

20 

21from .metaphor_token import Token, TokenType 

22from .embed_lexer import EmbedLexer 

23from .metaphor_lexer import MetaphorLexer 

24from .metaphor_ast_node import MetaphorASTNode, MetaphorASTNodeType 

25 

26class MetaphorParserFileAlreadyUsedError(Exception): 

27 """Exception raised when a file is used more than once.""" 

28 def __init__(self, filename: str, token: Token) -> None: 

29 super().__init__(f"The file '{filename}' has already been used.") 

30 self.filename: str = filename 

31 self.token: Token = token 

32 

33 

34class MetaphorParserSyntaxError(Exception): 

35 """Exception generated when there is a syntax error.""" 

36 def __init__(self, message: str, filename: str, line: int, column: int, input_text: str) -> None: 

37 super().__init__(f"{message}: file: {filename}, line {line}, column {column}, ") 

38 self.message: str = message 

39 self.filename: str = filename 

40 self.line: int = line 

41 self.column: int = column 

42 self.input_text: str = input_text 

43 

44 

45class MetaphorParserError(Exception): 

46 """Exception wrapper generated when there is a syntax error.""" 

47 def __init__(self, message: str, errors: List[MetaphorParserSyntaxError]) -> None: 

48 super().__init__(message) 

49 self.errors: List[MetaphorParserSyntaxError] = errors 

50 

51 

52class MetaphorParser: 

53 """ 

54 Parser class to process tokens and build an Abstract Syntax Tree (AST). 

55 

56 Attributes: 

57 syntax_tree (MetaphorASTNode): The root node of the AST. 

58 parse_errors (List[MetaphorParserSyntaxError]): List of syntax errors encountered during parsing. 

59 lexers (List[Union[MetaphorLexer, EmbedLexer]]): Stack of lexers used for parsing multiple files. 

60 previously_seen_files (Set[str]): Set of canonical filenames already processed. 

61 search_paths (List[str]): List of paths to search for included files. 

62 current_token (Optional[Token]): The current token being processed. 

63 """ 

64 def __init__(self) -> None: 

65 self.syntax_tree: MetaphorASTNode = MetaphorASTNode(MetaphorASTNodeType.ROOT, "") 

66 self.parse_errors: List[MetaphorParserSyntaxError] = [] 

67 self.lexers: List[Union[MetaphorLexer, EmbedLexer]] = [] 

68 self.previously_seen_files: Set[str] = set() 

69 self.search_paths: List[str] = [] 

70 self.current_token: Optional[Token] = None 

71 

72 def parse(self, input_text: str, filename: str, search_paths: List[str]) -> MetaphorASTNode: 

73 """ 

74 Parse an input string and construct the AST. 

75 

76 Args: 

77 input_text (str): The text to be parsed. 

78 filename (str): The name of the file being parsed. 

79 search_paths (List[str]): List of paths to search for included files. 

80 

81 Returns: 

82 List[Optional[MetaphorASTNode]]: A list containing the role, context, and action AST nodes. 

83 

84 Raises: 

85 MetaphorParserError: If there are syntax errors during parsing. 

86 FileNotFoundError: If a required file cannot be found. 

87 """ 

88 self.search_paths = search_paths 

89 

90 try: 

91 self.lexers.append(MetaphorLexer(input_text, filename)) 

92 

93 seen_action_tree: bool = False 

94 seen_context_tree: bool = False 

95 seen_role_tree: bool = False 

96 

97 while True: 

98 token = self.get_next_token() 

99 if token.type == TokenType.ACTION: 

100 if seen_action_tree: 

101 self._record_syntax_error(token, "'Action' already defined") 

102 

103 self.syntax_tree.attach_child(self._parse_action(token)) 

104 seen_action_tree = True 

105 elif token.type == TokenType.CONTEXT: 

106 if seen_context_tree: 

107 self._record_syntax_error(token, "'Context' already defined") 

108 

109 self.syntax_tree.attach_child(self._parse_context(token)) 

110 seen_context_tree = True 

111 elif token.type == TokenType.ROLE: 

112 if seen_role_tree: 

113 self._record_syntax_error(token, "'Role' already defined") 

114 

115 self.syntax_tree.attach_child(self._parse_role(token)) 

116 seen_role_tree = True 

117 elif token.type == TokenType.END_OF_FILE: 

118 if self.parse_errors: 

119 raise(MetaphorParserError("parser error", self.parse_errors)) 

120 

121 return self.syntax_tree 

122 else: 

123 self._record_syntax_error(token, f"Unexpected token: {token.value} at top level") 

124 except FileNotFoundError as e: 

125 err_token = self.current_token 

126 self.parse_errors.append(MetaphorParserSyntaxError( 

127 f"{e}", err_token.filename, err_token.line, err_token.column, err_token.input 

128 )) 

129 raise(MetaphorParserError("parser error", self.parse_errors)) from e 

130 except MetaphorParserFileAlreadyUsedError as e: 

131 self.parse_errors.append(MetaphorParserSyntaxError( 

132 f"The file '{e.filename}' has already been used", 

133 e.token.filename, 

134 e.token.line, 

135 e.token.column, 

136 e.token.input 

137 )) 

138 raise(MetaphorParserError("parser error", self.parse_errors)) from e 

139 

140 def parse_file(self, filename: str, search_paths: List[str]) -> MetaphorASTNode: 

141 """ 

142 Parse a file and construct the AST. 

143 

144 Args: 

145 filename (str): The path to the file to be parsed. 

146 search_paths (List[str]): List of paths to search for included files. 

147 

148 Returns: 

149 List[Optional[MetaphorASTNode]]: A list containing the role, context, and action AST nodes. 

150 

151 Raises: 

152 MetaphorParserError: If there are syntax errors during parsing. 

153 FileNotFoundError: If the file cannot be found. 

154 """ 

155 try: 

156 self._check_file_not_loaded(filename) 

157 input_text = self._read_file(filename) 

158 return self.parse(input_text, filename, search_paths) 

159 except FileNotFoundError as e: 

160 self.parse_errors.append(MetaphorParserSyntaxError( 

161 f"{e}", "", 0, 0, "" 

162 )) 

163 raise(MetaphorParserError("parser error", self.parse_errors)) from e 

164 except MetaphorParserError as e: 

165 raise(MetaphorParserError("parser error", self.parse_errors)) from e 

166 

167 def get_next_token(self) -> Token: 

168 """Get the next token from the active lexer.""" 

169 while self.lexers: 

170 lexer = self.lexers[-1] 

171 token = lexer.get_next_token() 

172 self.current_token = token 

173 

174 if token.type == TokenType.INCLUDE: 

175 self._parse_include() 

176 elif token.type == TokenType.EMBED: 

177 self._parse_embed() 

178 elif token.type == TokenType.END_OF_FILE: 

179 self.lexers.pop() 

180 else: 

181 return token 

182 

183 return Token(TokenType.END_OF_FILE, "", "", "", 0, 0) 

184 

185 def _record_syntax_error(self, token, message): 

186 """Raise a syntax error and add it to the error list.""" 

187 error = MetaphorParserSyntaxError( 

188 message, token.filename, token.line, token.column, token.input 

189 ) 

190 self.parse_errors.append(error) 

191 

192 def _find_file_path(self, filename): 

193 """Try to find a valid path for a file, given all the search path options""" 

194 if Path(filename).exists(): 

195 return filename 

196 

197 # If we don't have an absolute path then we can try search paths. 

198 if not os.path.isabs(filename): 

199 for path in self.search_paths: 

200 try_name = os.path.join(path, filename) 

201 if Path(try_name).exists(): 

202 return try_name 

203 

204 raise FileNotFoundError(f"File not found: {filename}") 

205 

206 def _read_file(self, filename): 

207 """Read file content into memory.""" 

208 try: 

209 with open(filename, 'r', encoding='utf-8') as file: 

210 return file.read() 

211 except FileNotFoundError as e: 

212 raise FileNotFoundError(f"File not found: {filename}") from e 

213 except PermissionError as e: 

214 raise FileNotFoundError(f"You do not have permission to access: {filename}") from e 

215 except IsADirectoryError as e: 

216 raise FileNotFoundError(f"Is a directory: {filename}") from e 

217 except OSError as e: 

218 raise FileNotFoundError(f"OS error: {e}") from e 

219 

220 def _check_file_not_loaded(self, filename): 

221 """Check we have not already loaded a file.""" 

222 canonical_filename = os.path.realpath(filename) 

223 if canonical_filename in self.previously_seen_files: 

224 raise MetaphorParserFileAlreadyUsedError(filename, self.current_token) 

225 

226 self.previously_seen_files.add(canonical_filename) 

227 

228 def _parse_text(self, token): 

229 """Parse a text block.""" 

230 return MetaphorASTNode(MetaphorASTNodeType.TEXT, token.value) 

231 

232 def _parse_action(self, token): 

233 """Parse an action block and construct its AST node.""" 

234 label_name = "" 

235 

236 init_token = self.get_next_token() 

237 if init_token.type == TokenType.KEYWORD_TEXT: 

238 label_name = init_token.value 

239 indent_token = self.get_next_token() 

240 if indent_token.type != TokenType.INDENT: 

241 self._record_syntax_error( 

242 token, 

243 "Expected indent after keyword description for 'Action' block" 

244 ) 

245 elif init_token.type != TokenType.INDENT: 

246 self._record_syntax_error(token, "Expected description or indent for 'Action' block") 

247 

248 action_node = MetaphorASTNode(MetaphorASTNodeType.ACTION, label_name) 

249 

250 while True: 

251 token = self.get_next_token() 

252 if token.type == TokenType.TEXT: 

253 action_node.attach_child(self._parse_text(token)) 

254 elif token.type == TokenType.OUTDENT or token.type == TokenType.END_OF_FILE: 

255 return action_node 

256 else: 

257 self._record_syntax_error( 

258 token, 

259 f"Unexpected token: {token.value} in 'Action' block" 

260 ) 

261 

262 def _parse_context(self, token): 

263 """Parse a Context block.""" 

264 label_name = "" 

265 

266 seen_token_type = TokenType.NONE 

267 

268 init_token = self.get_next_token() 

269 if init_token.type == TokenType.KEYWORD_TEXT: 

270 label_name = init_token.value 

271 indent_token = self.get_next_token() 

272 if indent_token.type != TokenType.INDENT: 

273 self._record_syntax_error( 

274 token, 

275 "Expected indent after keyword description for 'Context' block" 

276 ) 

277 elif init_token.type != TokenType.INDENT: 

278 self._record_syntax_error(token, "Expected description or indent for 'Context' block") 

279 

280 context_node = MetaphorASTNode(MetaphorASTNodeType.CONTEXT, label_name) 

281 

282 while True: 

283 token = self.get_next_token() 

284 if token.type == TokenType.TEXT: 

285 if seen_token_type != TokenType.NONE: 

286 self._record_syntax_error(token, "Text must come first in a 'Context' block") 

287 

288 context_node.attach_child(self._parse_text(token)) 

289 elif token.type == TokenType.CONTEXT: 

290 context_node.attach_child(self._parse_context(token)) 

291 seen_token_type = TokenType.CONTEXT 

292 elif token.type == TokenType.OUTDENT or token.type == TokenType.END_OF_FILE: 

293 return context_node 

294 else: 

295 self._record_syntax_error(token, f"Unexpected token: {token.value} in 'Context' block") 

296 

297 def _parse_role(self, token): 

298 """Parse a Role block.""" 

299 label_name = "" 

300 

301 init_token = self.get_next_token() 

302 if init_token.type == TokenType.KEYWORD_TEXT: 

303 label_name = init_token.value 

304 indent_token = self.get_next_token() 

305 if indent_token.type != TokenType.INDENT: 

306 self._record_syntax_error( 

307 token, 

308 "Expected indent after keyword description for 'Role' block" 

309 ) 

310 elif init_token.type != TokenType.INDENT: 

311 self._record_syntax_error(token, "Expected description or indent for 'Role' block") 

312 

313 role_node = MetaphorASTNode(MetaphorASTNodeType.ROLE, label_name) 

314 

315 while True: 

316 token = self.get_next_token() 

317 if token.type == TokenType.TEXT: 

318 role_node.attach_child(self._parse_text(token)) 

319 elif token.type == TokenType.OUTDENT or token.type == TokenType.END_OF_FILE: 

320 return role_node 

321 else: 

322 self._record_syntax_error( 

323 token, 

324 f"Unexpected token: {token.value} in 'Role' block" 

325 ) 

326 

327 def _parse_include(self): 

328 """Parse an Include block and load the included file.""" 

329 token_next = self.get_next_token() 

330 if token_next.type != TokenType.KEYWORD_TEXT: 

331 self._record_syntax_error(token_next, "Expected file name for 'Include'") 

332 return 

333 

334 filename = token_next.value 

335 self._check_file_not_loaded(filename) 

336 try_file = self._find_file_path(filename) 

337 input_text = self._read_file(try_file) 

338 self.lexers.append(MetaphorLexer(input_text, try_file)) 

339 

340 def _parse_embed(self): 

341 """Parse an Embed block and load the embedded file.""" 

342 token_next = self.get_next_token() 

343 if token_next.type != TokenType.KEYWORD_TEXT: 

344 self._record_syntax_error(token_next, "Expected file name or wildcard match for 'Embed'") 

345 return 

346 

347 recurse = False 

348 match = token_next.value 

349 if "**/" in match: 

350 recurse = True 

351 

352 files = glob.glob(match, recursive=recurse) 

353 if not files: 

354 self._record_syntax_error(token_next, f"{match} does not match any files for 'Embed'") 

355 return 

356 

357 for file in files: 

358 input_text = self._read_file(file) 

359 self.lexers.append(EmbedLexer(input_text, file))