phml.utilities.validate.validate
1from re import match, split, sub 2from typing import Any 3 4from phml.core.nodes import NODE, Comment, Element, Literal, Parent, Root, Text 5 6__all__ = [ 7 "validate", 8 "parent", 9 "literal", 10 "generated", 11 "has_property", 12 "is_heading", 13 "is_css_link", 14 "is_css_style", 15 "is_javascript", 16 "is_element", 17 "is_embedded", 18 "is_interactive", 19 "is_phrasing", 20 "is_event_handler", 21 "blank", 22] 23 24 25def validate(node: NODE) -> bool: 26 """Validate a node based on attributes and type.""" 27 28 if hasattr(node, "children"): 29 if not hasattr(node, "type"): 30 raise AssertionError("Node should have a type") 31 32 if node.type not in ["root", "element"]: 33 raise AssertionError( 34 "Node should have a type of 'root' or 'element' to contain the 'children' attribute" 35 ) 36 37 if not all(isinstance(child, NODE) for child in node.children): 38 raise AssertionError("Children must be a node type") 39 40 if hasattr(node, "properties"): 41 if hasattr(node, "type") and node.type != "element": 42 raise AssertionError("Node must be of type 'element' to contain 'properties'") 43 44 if not all(isinstance(node[prop], (int, str)) for prop in node.properties): 45 raise AssertionError("Node 'properties' must be of type 'int' or 'str'") 46 47 if hasattr(node, "value") and not isinstance(node.value, str): 48 raise AssertionError("Node 'value' must be of type 'str'") 49 50 return True 51 52 53def parent(node: Root | Element) -> bool: 54 """Validate a parent node based on attributes and type.""" 55 if not issubclass(type(node), Parent): 56 raise AssertionError( 57 "Node must inherit from 'Parent'. 'Root' and 'Element' are most common." 58 ) 59 60 if not hasattr(node, "children") or node.children is None: 61 raise AssertionError("Parent nodes should have the 'children' attribute") 62 63 if node.type == "element" and (not hasattr(node, "properties") or node.properties is None): 64 raise AssertionError("Parent element node shoudl have the 'properties' element.") 65 66 67def literal(node: Text | Comment) -> bool: 68 """Validate a literal node based on attributes.""" 69 70 if not issubclass(type(node), Literal): 71 raise AssertionError( 72 "Node must inherit from 'Literal'. 'Text' and 'Comment' are most common." 73 ) 74 75 if not hasattr(node, "value") or not isinstance(node.value, str): 76 raise AssertionError("Literal nodes 'value' type should be 'str'") 77 78 79def generated(node: NODE) -> bool: 80 """Checks if a node has been generated. A node is concidered 81 generated if it does not have a position. 82 83 Args: 84 node (NODE): Node to check for position with. 85 86 Returns: 87 bool: Whether a node has a position or not. 88 """ 89 return not hasattr(node, "position") or node.position is None 90 91 92def is_heading(node) -> bool: 93 """Check if an element is a heading.""" 94 95 if node.type == "element": 96 if match(r"h[1-6]", node.tag) is not None: 97 return True 98 return False 99 raise TypeError("Node must be an element.") 100 101 102def is_css_link(node) -> bool: 103 """Check if an element is a `link` to a css file. 104 105 Returns `true` if `node` is a `<link>` element with a `rel` list that 106 contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'` 107 as its `type` 108 """ 109 110 return ( 111 # Verify it is a element with a `link` tag 112 is_element(node, "link") 113 # Must have a rel list with stylesheet 114 and has_property(node, "rel") 115 and "stylesheet" in split(r" ", sub(r" +", " ", node["rel"])) 116 and ( 117 # Can have a `type` of `text/css` or empty or no `type` 118 not has_property(node, "type") 119 or (has_property(node, "type") and (node["type"] == "text/css" or node["type"] == "")) 120 ) 121 ) 122 123 124def is_css_style(node) -> bool: 125 """Check if an element is a css `style` element. 126 127 Returns `true` if `node` is a `<style>` element that 128 has no `type`, an empty `type`, or `'text/css'` as its `type`. 129 """ 130 131 return is_element(node, "style") and ( 132 not has_property(node, "type") 133 or (has_property(node, "type") and (node["type"] == "" or node["type"] == "text/css")) 134 ) 135 136 137def is_javascript(node) -> bool: 138 """Check if an element is a javascript `script` element. 139 140 Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no 141 `type` and a valid JavaScript `language`, or has neither. 142 """ 143 return is_element(node, "script") and ( 144 ( 145 has_property(node, "type") 146 and node["type"] in ["text/ecmascript", "text/javascript"] 147 and not has_property(node, "language") 148 ) 149 or ( 150 has_property(node, "language") 151 and node["language"] in ["ecmascript", "javascript"] 152 and not has_property(node, "type") 153 ) 154 or (not has_property(node, "type") and not has_property(node, "language")) 155 ) 156 157 158def is_element(node, *conditions: str | list) -> bool: 159 """Checks if the given node is a certain element. 160 161 When providing a str it will check that the elements tag matches. 162 If a list is provided it checks that one of the conditions in the list 163 passes. 164 """ 165 166 return bool( 167 node.type == "element" 168 and any( 169 bool( 170 (isinstance(condition, str) and node.tag == condition) 171 or (isinstance(condition, list) and any(node.tag == nested for nested in condition)) 172 ) 173 for condition in conditions 174 ) 175 ) 176 177 178def is_event_handler(attribute: str) -> bool: 179 """Takes a attribute name and returns true if 180 it starts with `on` and its length is `5` or more. 181 """ 182 return attribute.startswith("on") and len(attribute) >= 5 183 184 185def has_property(node, attribute: str) -> bool: 186 """Check to see if an element has a certain property in properties.""" 187 if node.type == "element": 188 if attribute in node.properties: 189 return True 190 return False 191 raise TypeError("Node must be an element.") 192 193 194def is_embedded(node: Element) -> bool: 195 """Check to see if an element is an embedded element. 196 197 Embedded Elements: 198 199 * audio 200 * canvas 201 * embed 202 * iframe 203 * img 204 * MathML math 205 * object 206 * picture 207 * SVG svg 208 * video 209 210 Returns: 211 True if emedded 212 """ 213 # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video 214 215 return is_element( 216 node, 217 "audio", 218 "canvas", 219 "embed", 220 "iframe", 221 "img", 222 "math", 223 "object", 224 "picture", 225 "svg", 226 "video", 227 ) 228 229 230def is_interactive(node: Element) -> bool: 231 """Check if the element is intended for user interaction. 232 233 Conditions: 234 235 * a (if the href attribute is present) 236 * audio (if the controls attribute is present) 237 * button, details, embed, iframe, img (if the usemap attribute is present) 238 * input (if the type attribute is not in the Hidden state) 239 * label, select, text, area, video (if the controls attribute is present) 240 241 Returns: 242 True if element is interactive 243 """ 244 245 if is_element(node, "a"): 246 return has_property(node, "href") 247 248 if is_element(node, "input"): 249 return has_property(node, "type") and node["type"].lower() != "hidden" 250 251 if is_element(node, "img"): 252 return has_property(node, "usemap") and node["usemap"] 253 254 if is_element(node, "video"): 255 return has_property(node, "controls") 256 257 if is_element(node, "button", "details", "embed", "iframe", "label", "select", "textarea"): 258 return True 259 260 return False 261 262 263def is_phrasing(node: Element) -> bool: 264 """Check if a node is phrasing text according to 265 https://html.spec.whatwg.org/#phrasing-content-2. 266 267 Phrasing content is the text of the document, as well as elements that mark up that text at the 268 intra-paragraph level. Runs of phrasing content form paragraphs. 269 270 * area (if it is a descendant of a map element) 271 * link (if it is allowed in the body) 272 * meta (if the itemprop attribute is present) 273 * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, 274 em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, 275 picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, 276 template, textarea, time, u, var, video, wbr, text (true) 277 278 Returns: 279 True if the element is phrasing text 280 """ 281 282 if isinstance(node, Text): 283 return True 284 285 if is_element(node, "area"): 286 return node.parent is not None and is_element(node.parent, "map") 287 288 if is_element(node, "meta"): 289 return has_property(node, "itemprop") 290 291 if is_element(node, "link"): 292 body_ok = [ 293 "dns-prefetch", 294 "modulepreload", 295 "pingback", 296 "preconnect", 297 "prefetch", 298 "preload", 299 "prerender", 300 "stylesheet", 301 ] 302 303 return bool( 304 has_property(node, "itemprop") 305 or ( 306 has_property(node, "rel") 307 and all(token.strip() in body_ok for token in node["rel"].split(" ")) 308 ) 309 ) 310 311 if is_element( 312 node, 313 "node", 314 "map", 315 "mark", 316 "math", 317 "audio", 318 "b", 319 "bdi", 320 "bdo", 321 "br", 322 "button", 323 "canvas", 324 "cite", 325 "code", 326 "data", 327 "datalist", 328 "del", 329 "dfn", 330 "em", 331 "embed", 332 "i", 333 "iframe", 334 "img", 335 "input", 336 "ins", 337 "kbd", 338 "label", 339 "a", 340 "abbr", 341 "meter", 342 "noscript", 343 "object", 344 "output", 345 "picture", 346 "progress", 347 "q", 348 "ruby", 349 "s", 350 "samp", 351 "script", 352 "select", 353 "slot", 354 "small", 355 "span", 356 "strong", 357 "sub", 358 "sup", 359 "svg", 360 "template", 361 "textarea", 362 "time", 363 "u", 364 "var", 365 "video", 366 "wbr", 367 ): 368 return True 369 370 return False 371 372 373def blank(value: Any) -> bool: 374 """Takes any value type and returns whether it is blank/None. 375 For strings if the value is stripped and is equal to '' then it is blank. 376 Otherwise if len > 0 and is not None then not blank. 377 378 Args: 379 value (Any): The value to check if it is blank. 380 381 Returns: 382 bool: True if value is blank 383 """ 384 385 if value is not None: 386 if isinstance(value, str): 387 value = value.strip() 388 389 if hasattr(value, "__len__"): 390 return len(value) == 0 391 return False 392 393 return True
26def validate(node: NODE) -> bool: 27 """Validate a node based on attributes and type.""" 28 29 if hasattr(node, "children"): 30 if not hasattr(node, "type"): 31 raise AssertionError("Node should have a type") 32 33 if node.type not in ["root", "element"]: 34 raise AssertionError( 35 "Node should have a type of 'root' or 'element' to contain the 'children' attribute" 36 ) 37 38 if not all(isinstance(child, NODE) for child in node.children): 39 raise AssertionError("Children must be a node type") 40 41 if hasattr(node, "properties"): 42 if hasattr(node, "type") and node.type != "element": 43 raise AssertionError("Node must be of type 'element' to contain 'properties'") 44 45 if not all(isinstance(node[prop], (int, str)) for prop in node.properties): 46 raise AssertionError("Node 'properties' must be of type 'int' or 'str'") 47 48 if hasattr(node, "value") and not isinstance(node.value, str): 49 raise AssertionError("Node 'value' must be of type 'str'") 50 51 return True
Validate a node based on attributes and type.
54def parent(node: Root | Element) -> bool: 55 """Validate a parent node based on attributes and type.""" 56 if not issubclass(type(node), Parent): 57 raise AssertionError( 58 "Node must inherit from 'Parent'. 'Root' and 'Element' are most common." 59 ) 60 61 if not hasattr(node, "children") or node.children is None: 62 raise AssertionError("Parent nodes should have the 'children' attribute") 63 64 if node.type == "element" and (not hasattr(node, "properties") or node.properties is None): 65 raise AssertionError("Parent element node shoudl have the 'properties' element.")
Validate a parent node based on attributes and type.
68def literal(node: Text | Comment) -> bool: 69 """Validate a literal node based on attributes.""" 70 71 if not issubclass(type(node), Literal): 72 raise AssertionError( 73 "Node must inherit from 'Literal'. 'Text' and 'Comment' are most common." 74 ) 75 76 if not hasattr(node, "value") or not isinstance(node.value, str): 77 raise AssertionError("Literal nodes 'value' type should be 'str'")
Validate a literal node based on attributes.
80def generated(node: NODE) -> bool: 81 """Checks if a node has been generated. A node is concidered 82 generated if it does not have a position. 83 84 Args: 85 node (NODE): Node to check for position with. 86 87 Returns: 88 bool: Whether a node has a position or not. 89 """ 90 return not hasattr(node, "position") or node.position is None
Checks if a node has been generated. A node is concidered generated if it does not have a position.
Args
- node (NODE): Node to check for position with.
Returns
bool: Whether a node has a position or not.
186def has_property(node, attribute: str) -> bool: 187 """Check to see if an element has a certain property in properties.""" 188 if node.type == "element": 189 if attribute in node.properties: 190 return True 191 return False 192 raise TypeError("Node must be an element.")
Check to see if an element has a certain property in properties.
93def is_heading(node) -> bool: 94 """Check if an element is a heading.""" 95 96 if node.type == "element": 97 if match(r"h[1-6]", node.tag) is not None: 98 return True 99 return False 100 raise TypeError("Node must be an element.")
Check if an element is a heading.
103def is_css_link(node) -> bool: 104 """Check if an element is a `link` to a css file. 105 106 Returns `true` if `node` is a `<link>` element with a `rel` list that 107 contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'` 108 as its `type` 109 """ 110 111 return ( 112 # Verify it is a element with a `link` tag 113 is_element(node, "link") 114 # Must have a rel list with stylesheet 115 and has_property(node, "rel") 116 and "stylesheet" in split(r" ", sub(r" +", " ", node["rel"])) 117 and ( 118 # Can have a `type` of `text/css` or empty or no `type` 119 not has_property(node, "type") 120 or (has_property(node, "type") and (node["type"] == "text/css" or node["type"] == "")) 121 ) 122 )
Check if an element is a link
to a css file.
Returns true
if node
is a <link>
element with a rel
list that
contains 'stylesheet'
and has no type
, an empty type
, or 'text/css'
as its type
125def is_css_style(node) -> bool: 126 """Check if an element is a css `style` element. 127 128 Returns `true` if `node` is a `<style>` element that 129 has no `type`, an empty `type`, or `'text/css'` as its `type`. 130 """ 131 132 return is_element(node, "style") and ( 133 not has_property(node, "type") 134 or (has_property(node, "type") and (node["type"] == "" or node["type"] == "text/css")) 135 )
Check if an element is a css style
element.
Returns true
if node
is a <style>
element that
has no type
, an empty type
, or 'text/css'
as its type
.
138def is_javascript(node) -> bool: 139 """Check if an element is a javascript `script` element. 140 141 Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no 142 `type` and a valid JavaScript `language`, or has neither. 143 """ 144 return is_element(node, "script") and ( 145 ( 146 has_property(node, "type") 147 and node["type"] in ["text/ecmascript", "text/javascript"] 148 and not has_property(node, "language") 149 ) 150 or ( 151 has_property(node, "language") 152 and node["language"] in ["ecmascript", "javascript"] 153 and not has_property(node, "type") 154 ) 155 or (not has_property(node, "type") and not has_property(node, "language")) 156 )
Check if an element is a javascript script
element.
Returns true
if node
is a <script>
element that has a valid JavaScript type
, has no
type
and a valid JavaScript language
, or has neither.
159def is_element(node, *conditions: str | list) -> bool: 160 """Checks if the given node is a certain element. 161 162 When providing a str it will check that the elements tag matches. 163 If a list is provided it checks that one of the conditions in the list 164 passes. 165 """ 166 167 return bool( 168 node.type == "element" 169 and any( 170 bool( 171 (isinstance(condition, str) and node.tag == condition) 172 or (isinstance(condition, list) and any(node.tag == nested for nested in condition)) 173 ) 174 for condition in conditions 175 ) 176 )
Checks if the given node is a certain element.
When providing a str it will check that the elements tag matches. If a list is provided it checks that one of the conditions in the list passes.
195def is_embedded(node: Element) -> bool: 196 """Check to see if an element is an embedded element. 197 198 Embedded Elements: 199 200 * audio 201 * canvas 202 * embed 203 * iframe 204 * img 205 * MathML math 206 * object 207 * picture 208 * SVG svg 209 * video 210 211 Returns: 212 True if emedded 213 """ 214 # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video 215 216 return is_element( 217 node, 218 "audio", 219 "canvas", 220 "embed", 221 "iframe", 222 "img", 223 "math", 224 "object", 225 "picture", 226 "svg", 227 "video", 228 )
Check to see if an element is an embedded element.
Embedded Elements:
- audio
- canvas
- embed
- iframe
- img
- MathML math
- object
- picture
- SVG svg
- video
Returns
True if emedded
231def is_interactive(node: Element) -> bool: 232 """Check if the element is intended for user interaction. 233 234 Conditions: 235 236 * a (if the href attribute is present) 237 * audio (if the controls attribute is present) 238 * button, details, embed, iframe, img (if the usemap attribute is present) 239 * input (if the type attribute is not in the Hidden state) 240 * label, select, text, area, video (if the controls attribute is present) 241 242 Returns: 243 True if element is interactive 244 """ 245 246 if is_element(node, "a"): 247 return has_property(node, "href") 248 249 if is_element(node, "input"): 250 return has_property(node, "type") and node["type"].lower() != "hidden" 251 252 if is_element(node, "img"): 253 return has_property(node, "usemap") and node["usemap"] 254 255 if is_element(node, "video"): 256 return has_property(node, "controls") 257 258 if is_element(node, "button", "details", "embed", "iframe", "label", "select", "textarea"): 259 return True 260 261 return False
Check if the element is intended for user interaction.
Conditions:
- a (if the href attribute is present)
- audio (if the controls attribute is present)
- button, details, embed, iframe, img (if the usemap attribute is present)
- input (if the type attribute is not in the Hidden state)
- label, select, text, area, video (if the controls attribute is present)
Returns
True if element is interactive
264def is_phrasing(node: Element) -> bool: 265 """Check if a node is phrasing text according to 266 https://html.spec.whatwg.org/#phrasing-content-2. 267 268 Phrasing content is the text of the document, as well as elements that mark up that text at the 269 intra-paragraph level. Runs of phrasing content form paragraphs. 270 271 * area (if it is a descendant of a map element) 272 * link (if it is allowed in the body) 273 * meta (if the itemprop attribute is present) 274 * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, 275 em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, 276 picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, 277 template, textarea, time, u, var, video, wbr, text (true) 278 279 Returns: 280 True if the element is phrasing text 281 """ 282 283 if isinstance(node, Text): 284 return True 285 286 if is_element(node, "area"): 287 return node.parent is not None and is_element(node.parent, "map") 288 289 if is_element(node, "meta"): 290 return has_property(node, "itemprop") 291 292 if is_element(node, "link"): 293 body_ok = [ 294 "dns-prefetch", 295 "modulepreload", 296 "pingback", 297 "preconnect", 298 "prefetch", 299 "preload", 300 "prerender", 301 "stylesheet", 302 ] 303 304 return bool( 305 has_property(node, "itemprop") 306 or ( 307 has_property(node, "rel") 308 and all(token.strip() in body_ok for token in node["rel"].split(" ")) 309 ) 310 ) 311 312 if is_element( 313 node, 314 "node", 315 "map", 316 "mark", 317 "math", 318 "audio", 319 "b", 320 "bdi", 321 "bdo", 322 "br", 323 "button", 324 "canvas", 325 "cite", 326 "code", 327 "data", 328 "datalist", 329 "del", 330 "dfn", 331 "em", 332 "embed", 333 "i", 334 "iframe", 335 "img", 336 "input", 337 "ins", 338 "kbd", 339 "label", 340 "a", 341 "abbr", 342 "meter", 343 "noscript", 344 "object", 345 "output", 346 "picture", 347 "progress", 348 "q", 349 "ruby", 350 "s", 351 "samp", 352 "script", 353 "select", 354 "slot", 355 "small", 356 "span", 357 "strong", 358 "sub", 359 "sup", 360 "svg", 361 "template", 362 "textarea", 363 "time", 364 "u", 365 "var", 366 "video", 367 "wbr", 368 ): 369 return True 370 371 return False
Check if a node is phrasing text according to https://html.spec.whatwg.org/#phrasing-content-2.
Phrasing content is the text of the document, as well as elements that mark up that text at the intra-paragraph level. Runs of phrasing content form paragraphs.
- area (if it is a descendant of a map element)
- link (if it is allowed in the body)
- meta (if the itemprop attribute is present)
- map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, template, textarea, time, u, var, video, wbr, text (true)
Returns
True if the element is phrasing text
179def is_event_handler(attribute: str) -> bool: 180 """Takes a attribute name and returns true if 181 it starts with `on` and its length is `5` or more. 182 """ 183 return attribute.startswith("on") and len(attribute) >= 5
Takes a attribute name and returns true if
it starts with on
and its length is 5
or more.
374def blank(value: Any) -> bool: 375 """Takes any value type and returns whether it is blank/None. 376 For strings if the value is stripped and is equal to '' then it is blank. 377 Otherwise if len > 0 and is not None then not blank. 378 379 Args: 380 value (Any): The value to check if it is blank. 381 382 Returns: 383 bool: True if value is blank 384 """ 385 386 if value is not None: 387 if isinstance(value, str): 388 value = value.strip() 389 390 if hasattr(value, "__len__"): 391 return len(value) == 0 392 return False 393 394 return True
Takes any value type and returns whether it is blank/None. For strings if the value is stripped and is equal to '' then it is blank. Otherwise if len > 0 and is not None then not blank.
Args
- value (Any): The value to check if it is blank.
Returns
bool: True if value is blank