phml.core.nodes.nodes
1from __future__ import annotations 2 3from functools import cached_property 4from typing import Any, Optional, overload 5 6__all__ = [ 7 "Element", 8 "Root", 9 "Node", 10 "DocType", 11 "Parent", 12 "PI", 13 "Comment", 14 "Literal", 15 "Point", 16 "Position", 17 "Text", 18 "NODE", 19] 20 21 22def leading_spaces(content: str | list[str]) -> int: 23 """Get the leading offset of the first line of the string.""" 24 content = content.split("\n") if isinstance(content, str) else content 25 return len(content[0]) - len(content[0].lstrip()) 26 27 28def strip_blank_lines(data_lines: list[str]) -> list[str]: 29 """Strip the blank lines at the start and end of a list.""" 30 data_lines = [line.replace("\r\n", "\n") for line in data_lines] 31 # remove leading blank lines 32 for idx in range(0, len(data_lines)): # pylint: disable=consider-using-enumerate 33 if data_lines[idx].strip() != "": 34 data_lines = data_lines[idx:] 35 break 36 if idx == len(data_lines) - 1: 37 data_lines = [] 38 break 39 40 # Remove trailing blank lines 41 if len(data_lines) > 0: 42 for idx in range(len(data_lines) - 1, -1, -1): 43 if data_lines[idx].replace("\n", " ").strip() != "": 44 data_lines = data_lines[: idx + 1] 45 break 46 47 return data_lines 48 49 50def normalize_indent(content: str, indent: int = 0) -> str: 51 """Normalize the indent between all lines. 52 53 Args: 54 content (str): The content to normalize the indent for 55 indent (bool): The amount of offset to add to each line after normalization. 56 57 Returns: 58 str: The normalized string 59 """ 60 61 content = strip_blank_lines(str(content).split("\n")) 62 if len(content) > 0: 63 offset = len(content[0]) - len(content[0].lstrip()) 64 lines = [] 65 for line in content: 66 if len(line) > 0 and leading_spaces(line) >= offset: 67 lines.append(" " * indent + line[offset:]) 68 else: 69 lines.append(line) 70 return "\n".join(lines) 71 return "" 72 73 74class Point: 75 """Represents one place in a source file. 76 77 The line field (1-indexed integer) represents a line in a source file. The column field 78 (1-indexed integer) represents a column in a source file. The offset field (0-indexed integer) 79 represents a character in a source file. 80 """ 81 82 def __init__(self, line: int, column: int, offset: Optional[int] = None): 83 if line is None or line < 0: 84 raise IndexError(f"Point.line must be >= 0 but was {line}") 85 86 self.line = line 87 88 if column is None or column < 0: 89 raise IndexError(f"Point.column must be >= 0 but was {column}") 90 91 self.column = column 92 93 if offset is not None and offset < 0: 94 raise IndexError(f"Point.offset must be >= 0 or None but was {line}") 95 96 self.offset = offset 97 98 def __eq__(self, obj) -> bool: 99 return bool( 100 obj is not None 101 and isinstance(obj, self.__class__) 102 and self.line == obj.line 103 and self.column == obj.column 104 ) 105 106 def __repr__(self) -> str: 107 return f"point(line: {self.line}, column: {self.column}, offset: {self.offset})" 108 109 def __str__(self) -> str: 110 return f"\x1b[38;5;244m{self.line}:{self.column}\x1b[39m" 111 112 113class Position: 114 """Position represents the location of a node in a source file. 115 116 The `start` field of `Position` represents the place of the first character 117 of the parsed source region. The `end` field of Position represents the place 118 of the first character after the parsed source region, whether it exists or not. 119 The value of the `start` and `end` fields implement the `Point` interface. 120 121 The `indent` field of `Position` represents the start column at each index 122 (plus start line) in the source region, for elements that span multiple lines. 123 124 If the syntactic unit represented by a node is not present in the source file at 125 the time of parsing, the node is said to be `generated` and it must not have positional 126 information. 127 """ 128 129 @overload 130 def __init__( 131 self, 132 start: tuple[int, int, int | None], 133 end: tuple[int, int, int | None], 134 indent: Optional[int] = None, 135 ): 136 """ 137 Args: 138 start (tuple[int, int, int | None]): Tuple representing the line, column, and optional 139 offset of the start point. 140 end (tuple[int, int, int | None]): Tuple representing the line, column, and optional 141 offset of the end point. 142 indent (Optional[int], optional): The indent amount for the start of the position. 143 """ 144 ... 145 146 def __init__(self, start: Point, end: Point, indent: Optional[int] = None): 147 """ 148 Args: 149 start (Point): Starting point of the position. 150 end (Point): End point of the position. 151 indent (int | None): The indent amount for the start of the position. 152 """ 153 154 self.start = ( 155 Point(start[0], start[1], start[2] if len(start) == 3 else None) 156 if isinstance(start, tuple) 157 else start 158 ) 159 self.end = ( 160 Point(end[0], end[1], end[2] if len(end) == 3 else None) 161 if isinstance(end, tuple) 162 else end 163 ) 164 165 if indent is not None and indent < 0: 166 raise IndexError( 167 f"Position.indent value must be >= 0 or None but was {indent}" 168 ) 169 170 self.indent = indent 171 172 def __eq__(self, obj) -> bool: 173 return bool( 174 obj is not None 175 and isinstance(obj, Position) 176 and self.start == obj.start 177 and self.end == obj.end 178 ) 179 180 def as_dict(self) -> dict: 181 """Convert the position object to a dict.""" 182 return { 183 "start": { 184 "line": self.start.line, 185 "column": self.start.column, 186 "offset": self.start.offset, 187 }, 188 "end": { 189 "line": self.end.line, 190 "column": self.end.column, 191 "offset": self.end.offset, 192 }, 193 "indent": self.indent, 194 } 195 196 def __repr__(self) -> str: 197 indent = f" ~ {self.indent}" if self.indent is not None else "" 198 return f"\x1b[38;5;8m<\x1b[39m{self.start}\x1b[38;5;8m-\x1b[39m{self.end}{indent}\x1b[38;5;8m>\x1b[39m" 199 200 def __str__(self) -> str: 201 return repr(self) 202 203 204class Node: # pylint: disable=too-few-public-methods 205 """All node values can be expressed in JSON as: string, number, 206 object, array, true, false, or null. This means that the syntax tree should 207 be able to be converted to and from JSON and produce the same tree. 208 For example, in JavaScript, a tree can be passed through JSON.parse(JSON.phml(tree)) 209 and result in the same tree. 210 """ 211 212 position: Position 213 """The location of a node in a source document. 214 The value of the position field implements the Position interface. 215 The position field must not be present if a node is generated. 216 """ 217 218 def __init__( 219 self, 220 position: Optional[Position] = None, 221 ): 222 self.position = position 223 224 @property 225 def type(self) -> str: 226 """Non-empty string representing the variant of a node. 227 This field can be used to determine the type a node implements.""" 228 return self.__class__.__name__.lower() 229 230 231class Parent(Node): # pylint: disable=too-few-public-methods 232 """Parent (UnistParent) represents a node in hast containing other nodes (said to be children). 233 234 Its content is limited to only other hast content. 235 """ 236 237 def __init__( 238 self, position: Optional[Position] = None, children: Optional[list] = None 239 ): 240 super().__init__(position) 241 242 if children is not None: 243 for child in children: 244 if hasattr(child, "type") and child.type in [ 245 "element", 246 "text", 247 "comment", 248 "doctype", 249 "root", 250 ]: 251 child.parent = self 252 253 self.children: list[Element | DocType | Comment | Text] = children or [] 254 255 def append(self, node: NODE): 256 """Add a node to the nested children of the current parent node.""" 257 node.parent = self 258 self.children.append(node) 259 260 def extend(self, nodes: list[NODE]): 261 """Add a node to the nested children of the current parent node.""" 262 for node in nodes: 263 self.append(node) 264 265 def insert(self, index: int, node: NODE): 266 """Insert a node into a specific position in the current parent node's children.""" 267 node.parent = self 268 self.children.insert(index, node) 269 270 def remove(self, node: NODE): 271 """Remove a specific node from the current parent node's children.""" 272 self.children.remove(node) 273 274 275class Root(Parent): 276 """Root (Parent) represents a document. 277 278 Root can be used as the root of a tree, or as a value 279 of the content field on a 'template' Element, never as a child. 280 """ 281 282 def __init__( 283 self, 284 position: Optional[Position] = None, 285 children: Optional[list] = None, 286 ): 287 super().__init__(position, children) 288 self.parent = None 289 290 def __eq__(self, obj) -> bool: 291 return bool( 292 obj is not None 293 and isinstance(obj, Root) 294 and len(self.children) == len(obj.children) 295 and all( 296 child == obj_child 297 for child, obj_child in zip(self.children, obj.children) 298 ) 299 ) 300 301 def __repr__(self) -> str: 302 return f"root [{len(self.children)}]" 303 304 305class Element(Parent): 306 """Element (Parent) represents an Element ([DOM]). 307 308 A tagName field must be present. It represents the element's local name ([DOM]). 309 310 The properties field represents information associated with the element. 311 The value of the properties field implements the Properties interface. 312 313 If the tagName field is 'template', a content field can be present. The value 314 of the content field implements the Root interface. 315 316 If the tagName field is 'template', the element must be a leaf. 317 318 If the tagName field is 'noscript', its children should be represented as if 319 scripting is disabled ([HTML]). 320 321 322 For example, the following HTML: 323 324 ```html 325 <a href="https://alpha.com" class="bravo" download></a> 326 ``` 327 328 Yields: 329 330 ```javascript 331 { 332 type: 'element', 333 tagName: 'a', 334 properties: { 335 href: 'https://alpha.com', 336 className: ['bravo'], 337 download: true 338 }, 339 children: [] 340 } 341 ``` 342 """ 343 344 def __init__( 345 self, 346 tag: str = "element", 347 properties: Optional[dict[str, str]] = None, 348 parent: Optional[Element | Root] = None, 349 startend: bool = False, 350 **kwargs, 351 ): 352 super().__init__(**kwargs) 353 self.properties = properties or {} 354 self.tag = tag 355 self.startend = startend 356 self.parent = parent 357 self.context = {} 358 359 def __contains__(self, index: str) -> bool: 360 return index in self.properties 361 362 def __getitem__(self, index: str) -> str: 363 return self.properties[index] 364 365 def __setitem__(self, index: str, value: str): 366 if not isinstance(index, str) or not isinstance(value, (str, bool)): 367 raise TypeError("Index must be a str and value must be either str or bool.") 368 369 self.properties[index] = value 370 371 def __delitem__(self, index: str): 372 if index in self.properties: 373 self.properties.pop(index, None) 374 375 def __eq__(self, obj) -> bool: 376 return bool( 377 obj is not None 378 and isinstance(obj, Element) 379 and self.tag == obj.tag 380 and self.startend == obj.startend 381 and self.properties == obj.properties 382 and len(self.children) == len(obj.children) 383 and all( 384 child == obj_child 385 for child, obj_child in zip(self.children, obj.children) 386 ) 387 ) 388 389 def get(self, attr: str, _default: Any = None) -> str | bool | Any | None: 390 """Get a specific attribute from an element. If no default return value 391 is provided then none is returned if no value is found. 392 """ 393 if attr in self: 394 return self[attr] 395 else: 396 return _default 397 398 def start_tag(self, indent: int = 4) -> list[str]: 399 """Builds the open/start tag for the element. 400 401 Note: 402 It will return `/>` if the tag is self closing. 403 404 Returns: 405 str: Built element start tag. 406 """ 407 opening = f"<{self.tag}" 408 409 attributes = [] 410 for prop in self.properties: 411 if isinstance(self[prop], bool) or self[prop] in ["yes", "no"]: 412 if self[prop] == "yes" or self[prop]: 413 attributes.append(prop) 414 else: 415 attributes.append(f'{prop}="{self[prop]}"') 416 417 closing = f"{'/' if self.startend else ''}>" 418 419 if len(attributes) <= 1: 420 return ( 421 [f"{opening}{closing}"] 422 if len(attributes) == 0 423 else [f"{opening} {attributes[0]}{closing}"] 424 ) 425 return [ 426 opening, 427 *[f"{' ' * indent}{attr}" for attr in attributes], 428 closing, 429 ] 430 431 def end_tag(self) -> str: 432 """Build the elements end tag. 433 434 Returns: 435 str: Built element end tag. 436 """ 437 return f"</{self.tag}>" if not self.startend else "" 438 439 def __repr__(self) -> str: 440 out = f"{self.type}(tag: {self.tag}, properties: {self.properties}, \ 441startend: {self.startend}, children: {len(self.children)})" 442 return out 443 444 445class PI(Node): 446 """A processing instruction node. Mainly used for XML.""" 447 448 def __init__( 449 self, tag: str, properties: dict, position: Optional[Position] = None 450 ) -> None: 451 super().__init__(position) 452 self.tag = tag 453 self.properties = properties 454 455 def stringify(self, indent: int = 0): # pylint: disable=unused-argument 456 """Construct the string representation of the processing instruction node.""" 457 attributes = " ".join( 458 f'{key}="{value}"' for key, value in self.properties.items() 459 ) 460 return f"<?{self.tag} {attributes}?>" 461 462 463class DocType(Node): 464 """Doctype (Node) represents a DocumentType ([DOM]). 465 466 Example: 467 468 ```html 469 <!doctype html> 470 ``` 471 472 Yields: 473 474 ```javascript 475 {type: 'doctype'} 476 ``` 477 """ 478 479 def __init__( 480 self, 481 lang: Optional[str] = None, 482 parent: Optional[Element | Root] = None, 483 position: Optional[Position] = None, 484 ): 485 super().__init__(position) 486 self.parent = parent 487 self.lang = lang or "html" 488 489 def __eq__(self, obj) -> bool: 490 if obj is None: 491 return False 492 493 if hasattr(obj, "type") and obj.type == self.type: 494 if self.lang == obj.lang: 495 return True 496 return False 497 498 def stringify(self, indent: int = 0) -> str: # pylint: disable=unused-argument 499 """Build indented html string of html doctype element. 500 501 Returns: 502 str: Built html of doctype element 503 """ 504 return f"<!DOCTYPE {self.lang or 'html'}>" 505 506 def __repr__(self) -> str: 507 return f"node.doctype({self.lang or 'html'})" 508 509 510class Literal(Node): 511 """Literal (UnistLiteral) represents a node in hast containing a value.""" 512 513 position: Position 514 """The location of a node in a source document. 515 The value of the position field implements the Position interface. 516 The position field must not be present if a node is generated. 517 """ 518 519 value: str 520 """The Literal nodes value. All literal values must be strings""" 521 522 def __init__( 523 self, 524 value: str = "", 525 parent: Optional[Element | Root] = None, 526 position: Optional[Position] = None, 527 ): 528 super().__init__(position) 529 self.value = str(value) 530 self.parent = parent 531 532 def __eq__(self, obj) -> bool: 533 return bool( 534 obj is not None and self.type == obj.type and self.value == obj.value 535 ) 536 537 def normalized(self, indent: int = 0) -> str: 538 """Get the normalized indented value with leading and trailing blank lines stripped.""" 539 return normalize_indent(self.value, indent) 540 541 def stringify(self, indent: int = 0) -> str: 542 if "pre" in self.get_ancestry(): 543 return self.value 544 return self.normalized(indent).rstrip() 545 546 def get_ancestry(self) -> list[str]: 547 """Get the ancestry of the literal node. 548 549 Used to validate whether there is a `pre` element in the ancestry. 550 """ 551 552 def get_parent(parent) -> list[str]: 553 result = [] 554 555 if parent is not None and hasattr(parent, "tag"): 556 result.append(parent.tag) 557 558 if parent.parent is not None: 559 result.extend(get_parent(parent.parent)) 560 561 return result 562 563 return get_parent(self.parent) 564 565 566class Text(Literal): 567 """Text (Literal) represents a Text ([DOM]). 568 569 Example: 570 571 ```html 572 <span>Foxtrot</span> 573 ``` 574 575 Yields: 576 577 ```javascript 578 { 579 type: 'element', 580 tagName: 'span', 581 properties: {}, 582 children: [{type: 'text', value: 'Foxtrot'}] 583 } 584 ``` 585 """ 586 587 @cached_property 588 def num_lines(self) -> int: 589 """Determine the number of lines the text has.""" 590 return len([line for line in str(self.value).split("\n") if line.strip() != ""]) 591 592 def __repr__(self) -> str: 593 return f"literal.text('{self.value}')" 594 595 596class Comment(Literal): 597 """Comment (Literal) represents a Comment ([DOM]). 598 599 Example: 600 ```html 601 <!--Charlie--> 602 ``` 603 """ 604 605 def stringify(self, indent: int = 0) -> str: 606 """Build indented html string of html comment. 607 608 Returns: 609 str: Built html of comment 610 """ 611 return " " * indent + f"<!--{self.value}-->" 612 613 def __repr__(self) -> str: 614 return f"literal.comment(value: {self.value})" 615 616 617NODE = Root | Element | Text | Comment | DocType | Parent | Node | Literal
306class Element(Parent): 307 """Element (Parent) represents an Element ([DOM]). 308 309 A tagName field must be present. It represents the element's local name ([DOM]). 310 311 The properties field represents information associated with the element. 312 The value of the properties field implements the Properties interface. 313 314 If the tagName field is 'template', a content field can be present. The value 315 of the content field implements the Root interface. 316 317 If the tagName field is 'template', the element must be a leaf. 318 319 If the tagName field is 'noscript', its children should be represented as if 320 scripting is disabled ([HTML]). 321 322 323 For example, the following HTML: 324 325 ```html 326 <a href="https://alpha.com" class="bravo" download></a> 327 ``` 328 329 Yields: 330 331 ```javascript 332 { 333 type: 'element', 334 tagName: 'a', 335 properties: { 336 href: 'https://alpha.com', 337 className: ['bravo'], 338 download: true 339 }, 340 children: [] 341 } 342 ``` 343 """ 344 345 def __init__( 346 self, 347 tag: str = "element", 348 properties: Optional[dict[str, str]] = None, 349 parent: Optional[Element | Root] = None, 350 startend: bool = False, 351 **kwargs, 352 ): 353 super().__init__(**kwargs) 354 self.properties = properties or {} 355 self.tag = tag 356 self.startend = startend 357 self.parent = parent 358 self.context = {} 359 360 def __contains__(self, index: str) -> bool: 361 return index in self.properties 362 363 def __getitem__(self, index: str) -> str: 364 return self.properties[index] 365 366 def __setitem__(self, index: str, value: str): 367 if not isinstance(index, str) or not isinstance(value, (str, bool)): 368 raise TypeError("Index must be a str and value must be either str or bool.") 369 370 self.properties[index] = value 371 372 def __delitem__(self, index: str): 373 if index in self.properties: 374 self.properties.pop(index, None) 375 376 def __eq__(self, obj) -> bool: 377 return bool( 378 obj is not None 379 and isinstance(obj, Element) 380 and self.tag == obj.tag 381 and self.startend == obj.startend 382 and self.properties == obj.properties 383 and len(self.children) == len(obj.children) 384 and all( 385 child == obj_child 386 for child, obj_child in zip(self.children, obj.children) 387 ) 388 ) 389 390 def get(self, attr: str, _default: Any = None) -> str | bool | Any | None: 391 """Get a specific attribute from an element. If no default return value 392 is provided then none is returned if no value is found. 393 """ 394 if attr in self: 395 return self[attr] 396 else: 397 return _default 398 399 def start_tag(self, indent: int = 4) -> list[str]: 400 """Builds the open/start tag for the element. 401 402 Note: 403 It will return `/>` if the tag is self closing. 404 405 Returns: 406 str: Built element start tag. 407 """ 408 opening = f"<{self.tag}" 409 410 attributes = [] 411 for prop in self.properties: 412 if isinstance(self[prop], bool) or self[prop] in ["yes", "no"]: 413 if self[prop] == "yes" or self[prop]: 414 attributes.append(prop) 415 else: 416 attributes.append(f'{prop}="{self[prop]}"') 417 418 closing = f"{'/' if self.startend else ''}>" 419 420 if len(attributes) <= 1: 421 return ( 422 [f"{opening}{closing}"] 423 if len(attributes) == 0 424 else [f"{opening} {attributes[0]}{closing}"] 425 ) 426 return [ 427 opening, 428 *[f"{' ' * indent}{attr}" for attr in attributes], 429 closing, 430 ] 431 432 def end_tag(self) -> str: 433 """Build the elements end tag. 434 435 Returns: 436 str: Built element end tag. 437 """ 438 return f"</{self.tag}>" if not self.startend else "" 439 440 def __repr__(self) -> str: 441 out = f"{self.type}(tag: {self.tag}, properties: {self.properties}, \ 442startend: {self.startend}, children: {len(self.children)})" 443 return out
Element (Parent) represents an Element ([DOM]).
A tagName field must be present. It represents the element's local name ([DOM]).
The properties field represents information associated with the element. The value of the properties field implements the Properties interface.
If the tagName field is 'template', a content field can be present. The value of the content field implements the Root interface.
If the tagName field is 'template', the element must be a leaf.
If the tagName field is 'noscript', its children should be represented as if scripting is disabled ([HTML]).
For example, the following HTML:
<a href="https://alpha.com" class="bravo" download></a>
Yields:
{
type: 'element',
tagName: 'a',
properties: {
href: 'https://alpha.com',
className: ['bravo'],
download: true
},
children: []
}
345 def __init__( 346 self, 347 tag: str = "element", 348 properties: Optional[dict[str, str]] = None, 349 parent: Optional[Element | Root] = None, 350 startend: bool = False, 351 **kwargs, 352 ): 353 super().__init__(**kwargs) 354 self.properties = properties or {} 355 self.tag = tag 356 self.startend = startend 357 self.parent = parent 358 self.context = {}
390 def get(self, attr: str, _default: Any = None) -> str | bool | Any | None: 391 """Get a specific attribute from an element. If no default return value 392 is provided then none is returned if no value is found. 393 """ 394 if attr in self: 395 return self[attr] 396 else: 397 return _default
Get a specific attribute from an element. If no default return value is provided then none is returned if no value is found.
399 def start_tag(self, indent: int = 4) -> list[str]: 400 """Builds the open/start tag for the element. 401 402 Note: 403 It will return `/>` if the tag is self closing. 404 405 Returns: 406 str: Built element start tag. 407 """ 408 opening = f"<{self.tag}" 409 410 attributes = [] 411 for prop in self.properties: 412 if isinstance(self[prop], bool) or self[prop] in ["yes", "no"]: 413 if self[prop] == "yes" or self[prop]: 414 attributes.append(prop) 415 else: 416 attributes.append(f'{prop}="{self[prop]}"') 417 418 closing = f"{'/' if self.startend else ''}>" 419 420 if len(attributes) <= 1: 421 return ( 422 [f"{opening}{closing}"] 423 if len(attributes) == 0 424 else [f"{opening} {attributes[0]}{closing}"] 425 ) 426 return [ 427 opening, 428 *[f"{' ' * indent}{attr}" for attr in attributes], 429 closing, 430 ]
Builds the open/start tag for the element.
Note
It will return
/>
if the tag is self closing.
Returns
str: Built element start tag.
276class Root(Parent): 277 """Root (Parent) represents a document. 278 279 Root can be used as the root of a tree, or as a value 280 of the content field on a 'template' Element, never as a child. 281 """ 282 283 def __init__( 284 self, 285 position: Optional[Position] = None, 286 children: Optional[list] = None, 287 ): 288 super().__init__(position, children) 289 self.parent = None 290 291 def __eq__(self, obj) -> bool: 292 return bool( 293 obj is not None 294 and isinstance(obj, Root) 295 and len(self.children) == len(obj.children) 296 and all( 297 child == obj_child 298 for child, obj_child in zip(self.children, obj.children) 299 ) 300 ) 301 302 def __repr__(self) -> str: 303 return f"root [{len(self.children)}]"
Root (Parent) represents a document.
Root can be used as the root of a tree, or as a value of the content field on a 'template' Element, never as a child.
205class Node: # pylint: disable=too-few-public-methods 206 """All node values can be expressed in JSON as: string, number, 207 object, array, true, false, or null. This means that the syntax tree should 208 be able to be converted to and from JSON and produce the same tree. 209 For example, in JavaScript, a tree can be passed through JSON.parse(JSON.phml(tree)) 210 and result in the same tree. 211 """ 212 213 position: Position 214 """The location of a node in a source document. 215 The value of the position field implements the Position interface. 216 The position field must not be present if a node is generated. 217 """ 218 219 def __init__( 220 self, 221 position: Optional[Position] = None, 222 ): 223 self.position = position 224 225 @property 226 def type(self) -> str: 227 """Non-empty string representing the variant of a node. 228 This field can be used to determine the type a node implements.""" 229 return self.__class__.__name__.lower()
All node values can be expressed in JSON as: string, number, object, array, true, false, or null. This means that the syntax tree should be able to be converted to and from JSON and produce the same tree. For example, in JavaScript, a tree can be passed through JSON.parse(JSON.phml(tree)) and result in the same tree.
The location of a node in a source document. The value of the position field implements the Position interface. The position field must not be present if a node is generated.
464class DocType(Node): 465 """Doctype (Node) represents a DocumentType ([DOM]). 466 467 Example: 468 469 ```html 470 <!doctype html> 471 ``` 472 473 Yields: 474 475 ```javascript 476 {type: 'doctype'} 477 ``` 478 """ 479 480 def __init__( 481 self, 482 lang: Optional[str] = None, 483 parent: Optional[Element | Root] = None, 484 position: Optional[Position] = None, 485 ): 486 super().__init__(position) 487 self.parent = parent 488 self.lang = lang or "html" 489 490 def __eq__(self, obj) -> bool: 491 if obj is None: 492 return False 493 494 if hasattr(obj, "type") and obj.type == self.type: 495 if self.lang == obj.lang: 496 return True 497 return False 498 499 def stringify(self, indent: int = 0) -> str: # pylint: disable=unused-argument 500 """Build indented html string of html doctype element. 501 502 Returns: 503 str: Built html of doctype element 504 """ 505 return f"<!DOCTYPE {self.lang or 'html'}>" 506 507 def __repr__(self) -> str: 508 return f"node.doctype({self.lang or 'html'})"
Doctype (Node) represents a DocumentType ([DOM]).
Example:
<!doctype html>
Yields:
{type: 'doctype'}
499 def stringify(self, indent: int = 0) -> str: # pylint: disable=unused-argument 500 """Build indented html string of html doctype element. 501 502 Returns: 503 str: Built html of doctype element 504 """ 505 return f"<!DOCTYPE {self.lang or 'html'}>"
Build indented html string of html doctype element.
Returns
str: Built html of doctype element
232class Parent(Node): # pylint: disable=too-few-public-methods 233 """Parent (UnistParent) represents a node in hast containing other nodes (said to be children). 234 235 Its content is limited to only other hast content. 236 """ 237 238 def __init__( 239 self, position: Optional[Position] = None, children: Optional[list] = None 240 ): 241 super().__init__(position) 242 243 if children is not None: 244 for child in children: 245 if hasattr(child, "type") and child.type in [ 246 "element", 247 "text", 248 "comment", 249 "doctype", 250 "root", 251 ]: 252 child.parent = self 253 254 self.children: list[Element | DocType | Comment | Text] = children or [] 255 256 def append(self, node: NODE): 257 """Add a node to the nested children of the current parent node.""" 258 node.parent = self 259 self.children.append(node) 260 261 def extend(self, nodes: list[NODE]): 262 """Add a node to the nested children of the current parent node.""" 263 for node in nodes: 264 self.append(node) 265 266 def insert(self, index: int, node: NODE): 267 """Insert a node into a specific position in the current parent node's children.""" 268 node.parent = self 269 self.children.insert(index, node) 270 271 def remove(self, node: NODE): 272 """Remove a specific node from the current parent node's children.""" 273 self.children.remove(node)
Parent (UnistParent) represents a node in hast containing other nodes (said to be children).
Its content is limited to only other hast content.
238 def __init__( 239 self, position: Optional[Position] = None, children: Optional[list] = None 240 ): 241 super().__init__(position) 242 243 if children is not None: 244 for child in children: 245 if hasattr(child, "type") and child.type in [ 246 "element", 247 "text", 248 "comment", 249 "doctype", 250 "root", 251 ]: 252 child.parent = self 253 254 self.children: list[Element | DocType | Comment | Text] = children or []
256 def append(self, node: NODE): 257 """Add a node to the nested children of the current parent node.""" 258 node.parent = self 259 self.children.append(node)
Add a node to the nested children of the current parent node.
261 def extend(self, nodes: list[NODE]): 262 """Add a node to the nested children of the current parent node.""" 263 for node in nodes: 264 self.append(node)
Add a node to the nested children of the current parent node.
266 def insert(self, index: int, node: NODE): 267 """Insert a node into a specific position in the current parent node's children.""" 268 node.parent = self 269 self.children.insert(index, node)
Insert a node into a specific position in the current parent node's children.
271 def remove(self, node: NODE): 272 """Remove a specific node from the current parent node's children.""" 273 self.children.remove(node)
Remove a specific node from the current parent node's children.
446class PI(Node): 447 """A processing instruction node. Mainly used for XML.""" 448 449 def __init__( 450 self, tag: str, properties: dict, position: Optional[Position] = None 451 ) -> None: 452 super().__init__(position) 453 self.tag = tag 454 self.properties = properties 455 456 def stringify(self, indent: int = 0): # pylint: disable=unused-argument 457 """Construct the string representation of the processing instruction node.""" 458 attributes = " ".join( 459 f'{key}="{value}"' for key, value in self.properties.items() 460 ) 461 return f"<?{self.tag} {attributes}?>"
A processing instruction node. Mainly used for XML.
456 def stringify(self, indent: int = 0): # pylint: disable=unused-argument 457 """Construct the string representation of the processing instruction node.""" 458 attributes = " ".join( 459 f'{key}="{value}"' for key, value in self.properties.items() 460 ) 461 return f"<?{self.tag} {attributes}?>"
Construct the string representation of the processing instruction node.
597class Comment(Literal): 598 """Comment (Literal) represents a Comment ([DOM]). 599 600 Example: 601 ```html 602 <!--Charlie--> 603 ``` 604 """ 605 606 def stringify(self, indent: int = 0) -> str: 607 """Build indented html string of html comment. 608 609 Returns: 610 str: Built html of comment 611 """ 612 return " " * indent + f"<!--{self.value}-->" 613 614 def __repr__(self) -> str: 615 return f"literal.comment(value: {self.value})"
Comment (Literal) represents a Comment ([DOM]).
Example:
<!--Charlie-->
606 def stringify(self, indent: int = 0) -> str: 607 """Build indented html string of html comment. 608 609 Returns: 610 str: Built html of comment 611 """ 612 return " " * indent + f"<!--{self.value}-->"
Build indented html string of html comment.
Returns
str: Built html of comment
Inherited Members
511class Literal(Node): 512 """Literal (UnistLiteral) represents a node in hast containing a value.""" 513 514 position: Position 515 """The location of a node in a source document. 516 The value of the position field implements the Position interface. 517 The position field must not be present if a node is generated. 518 """ 519 520 value: str 521 """The Literal nodes value. All literal values must be strings""" 522 523 def __init__( 524 self, 525 value: str = "", 526 parent: Optional[Element | Root] = None, 527 position: Optional[Position] = None, 528 ): 529 super().__init__(position) 530 self.value = str(value) 531 self.parent = parent 532 533 def __eq__(self, obj) -> bool: 534 return bool( 535 obj is not None and self.type == obj.type and self.value == obj.value 536 ) 537 538 def normalized(self, indent: int = 0) -> str: 539 """Get the normalized indented value with leading and trailing blank lines stripped.""" 540 return normalize_indent(self.value, indent) 541 542 def stringify(self, indent: int = 0) -> str: 543 if "pre" in self.get_ancestry(): 544 return self.value 545 return self.normalized(indent).rstrip() 546 547 def get_ancestry(self) -> list[str]: 548 """Get the ancestry of the literal node. 549 550 Used to validate whether there is a `pre` element in the ancestry. 551 """ 552 553 def get_parent(parent) -> list[str]: 554 result = [] 555 556 if parent is not None and hasattr(parent, "tag"): 557 result.append(parent.tag) 558 559 if parent.parent is not None: 560 result.extend(get_parent(parent.parent)) 561 562 return result 563 564 return get_parent(self.parent)
Literal (UnistLiteral) represents a node in hast containing a value.
The location of a node in a source document. The value of the position field implements the Position interface. The position field must not be present if a node is generated.
538 def normalized(self, indent: int = 0) -> str: 539 """Get the normalized indented value with leading and trailing blank lines stripped.""" 540 return normalize_indent(self.value, indent)
Get the normalized indented value with leading and trailing blank lines stripped.
547 def get_ancestry(self) -> list[str]: 548 """Get the ancestry of the literal node. 549 550 Used to validate whether there is a `pre` element in the ancestry. 551 """ 552 553 def get_parent(parent) -> list[str]: 554 result = [] 555 556 if parent is not None and hasattr(parent, "tag"): 557 result.append(parent.tag) 558 559 if parent.parent is not None: 560 result.extend(get_parent(parent.parent)) 561 562 return result 563 564 return get_parent(self.parent)
Get the ancestry of the literal node.
Used to validate whether there is a pre
element in the ancestry.
75class Point: 76 """Represents one place in a source file. 77 78 The line field (1-indexed integer) represents a line in a source file. The column field 79 (1-indexed integer) represents a column in a source file. The offset field (0-indexed integer) 80 represents a character in a source file. 81 """ 82 83 def __init__(self, line: int, column: int, offset: Optional[int] = None): 84 if line is None or line < 0: 85 raise IndexError(f"Point.line must be >= 0 but was {line}") 86 87 self.line = line 88 89 if column is None or column < 0: 90 raise IndexError(f"Point.column must be >= 0 but was {column}") 91 92 self.column = column 93 94 if offset is not None and offset < 0: 95 raise IndexError(f"Point.offset must be >= 0 or None but was {line}") 96 97 self.offset = offset 98 99 def __eq__(self, obj) -> bool: 100 return bool( 101 obj is not None 102 and isinstance(obj, self.__class__) 103 and self.line == obj.line 104 and self.column == obj.column 105 ) 106 107 def __repr__(self) -> str: 108 return f"point(line: {self.line}, column: {self.column}, offset: {self.offset})" 109 110 def __str__(self) -> str: 111 return f"\x1b[38;5;244m{self.line}:{self.column}\x1b[39m"
Represents one place in a source file.
The line field (1-indexed integer) represents a line in a source file. The column field (1-indexed integer) represents a column in a source file. The offset field (0-indexed integer) represents a character in a source file.
83 def __init__(self, line: int, column: int, offset: Optional[int] = None): 84 if line is None or line < 0: 85 raise IndexError(f"Point.line must be >= 0 but was {line}") 86 87 self.line = line 88 89 if column is None or column < 0: 90 raise IndexError(f"Point.column must be >= 0 but was {column}") 91 92 self.column = column 93 94 if offset is not None and offset < 0: 95 raise IndexError(f"Point.offset must be >= 0 or None but was {line}") 96 97 self.offset = offset
114class Position: 115 """Position represents the location of a node in a source file. 116 117 The `start` field of `Position` represents the place of the first character 118 of the parsed source region. The `end` field of Position represents the place 119 of the first character after the parsed source region, whether it exists or not. 120 The value of the `start` and `end` fields implement the `Point` interface. 121 122 The `indent` field of `Position` represents the start column at each index 123 (plus start line) in the source region, for elements that span multiple lines. 124 125 If the syntactic unit represented by a node is not present in the source file at 126 the time of parsing, the node is said to be `generated` and it must not have positional 127 information. 128 """ 129 130 @overload 131 def __init__( 132 self, 133 start: tuple[int, int, int | None], 134 end: tuple[int, int, int | None], 135 indent: Optional[int] = None, 136 ): 137 """ 138 Args: 139 start (tuple[int, int, int | None]): Tuple representing the line, column, and optional 140 offset of the start point. 141 end (tuple[int, int, int | None]): Tuple representing the line, column, and optional 142 offset of the end point. 143 indent (Optional[int], optional): The indent amount for the start of the position. 144 """ 145 ... 146 147 def __init__(self, start: Point, end: Point, indent: Optional[int] = None): 148 """ 149 Args: 150 start (Point): Starting point of the position. 151 end (Point): End point of the position. 152 indent (int | None): The indent amount for the start of the position. 153 """ 154 155 self.start = ( 156 Point(start[0], start[1], start[2] if len(start) == 3 else None) 157 if isinstance(start, tuple) 158 else start 159 ) 160 self.end = ( 161 Point(end[0], end[1], end[2] if len(end) == 3 else None) 162 if isinstance(end, tuple) 163 else end 164 ) 165 166 if indent is not None and indent < 0: 167 raise IndexError( 168 f"Position.indent value must be >= 0 or None but was {indent}" 169 ) 170 171 self.indent = indent 172 173 def __eq__(self, obj) -> bool: 174 return bool( 175 obj is not None 176 and isinstance(obj, Position) 177 and self.start == obj.start 178 and self.end == obj.end 179 ) 180 181 def as_dict(self) -> dict: 182 """Convert the position object to a dict.""" 183 return { 184 "start": { 185 "line": self.start.line, 186 "column": self.start.column, 187 "offset": self.start.offset, 188 }, 189 "end": { 190 "line": self.end.line, 191 "column": self.end.column, 192 "offset": self.end.offset, 193 }, 194 "indent": self.indent, 195 } 196 197 def __repr__(self) -> str: 198 indent = f" ~ {self.indent}" if self.indent is not None else "" 199 return f"\x1b[38;5;8m<\x1b[39m{self.start}\x1b[38;5;8m-\x1b[39m{self.end}{indent}\x1b[38;5;8m>\x1b[39m" 200 201 def __str__(self) -> str: 202 return repr(self)
Position represents the location of a node in a source file.
The start
field of Position
represents the place of the first character
of the parsed source region. The end
field of Position represents the place
of the first character after the parsed source region, whether it exists or not.
The value of the start
and end
fields implement the Point
interface.
The indent
field of Position
represents the start column at each index
(plus start line) in the source region, for elements that span multiple lines.
If the syntactic unit represented by a node is not present in the source file at
the time of parsing, the node is said to be generated
and it must not have positional
information.
147 def __init__(self, start: Point, end: Point, indent: Optional[int] = None): 148 """ 149 Args: 150 start (Point): Starting point of the position. 151 end (Point): End point of the position. 152 indent (int | None): The indent amount for the start of the position. 153 """ 154 155 self.start = ( 156 Point(start[0], start[1], start[2] if len(start) == 3 else None) 157 if isinstance(start, tuple) 158 else start 159 ) 160 self.end = ( 161 Point(end[0], end[1], end[2] if len(end) == 3 else None) 162 if isinstance(end, tuple) 163 else end 164 ) 165 166 if indent is not None and indent < 0: 167 raise IndexError( 168 f"Position.indent value must be >= 0 or None but was {indent}" 169 ) 170 171 self.indent = indent
Args
- start (Point): Starting point of the position.
- end (Point): End point of the position.
- indent (int | None): The indent amount for the start of the position.
181 def as_dict(self) -> dict: 182 """Convert the position object to a dict.""" 183 return { 184 "start": { 185 "line": self.start.line, 186 "column": self.start.column, 187 "offset": self.start.offset, 188 }, 189 "end": { 190 "line": self.end.line, 191 "column": self.end.column, 192 "offset": self.end.offset, 193 }, 194 "indent": self.indent, 195 }
Convert the position object to a dict.
567class Text(Literal): 568 """Text (Literal) represents a Text ([DOM]). 569 570 Example: 571 572 ```html 573 <span>Foxtrot</span> 574 ``` 575 576 Yields: 577 578 ```javascript 579 { 580 type: 'element', 581 tagName: 'span', 582 properties: {}, 583 children: [{type: 'text', value: 'Foxtrot'}] 584 } 585 ``` 586 """ 587 588 @cached_property 589 def num_lines(self) -> int: 590 """Determine the number of lines the text has.""" 591 return len([line for line in str(self.value).split("\n") if line.strip() != ""]) 592 593 def __repr__(self) -> str: 594 return f"literal.text('{self.value}')"
Text (Literal) represents a Text ([DOM]).
Example:
<span>Foxtrot</span>
Yields:
{
type: 'element',
tagName: 'span',
properties: {},
children: [{type: 'text', value: 'Foxtrot'}]
}