phml.utilities.locate.select
utilities.select
A collection of utilities around querying for specific types of data.
1"""utilities.select 2 3A collection of utilities around querying for specific 4types of data. 5""" 6 7import re 8from typing import Callable 9 10from phml.core.nodes import AST, Element, Root 11from phml.utilities.travel.travel import visit_children, walk 12 13__all__ = ["query", "query_all", "matches", "parse_specifiers"] 14 15 16def query(tree: AST | Root | Element, specifier: str) -> Element: 17 """Same as javascripts querySelector. `#` indicates an id and `.` 18 indicates a class. If they are used alone they match anything. 19 Any tag can be used by itself or with `#` and/or `.`. You may use 20 any number of class specifiers, but may only use one id specifier per 21 tag name. Complex specifiers are accepted are allowed meaning you can 22 have space seperated specifiers indicating nesting or a parent child 23 relationship. 24 25 Examles: 26 * `.some-example` matches the first element with the class `some-example` 27 * `#some-example` matches the first element with the id `some-example` 28 * `li` matches the first `li` element 29 * `li.red` matches the first `li` with the class `red` 30 * `li#red` matches the first `li` with the id `red` 31 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 32 * `div.form-control input[type="checkbox"]` matches the first `input` with the 33 attribute `type="checked"` that has a parent `div` with the class `form-control`. 34 35 Return: 36 Element | None: The first element matching the specifier or None if no element was 37 found. 38 """ 39 40 def all_nodes(current: Element, rules: list, include_self: bool = True): 41 """Get all nodes starting with the current node.""" 42 43 result = None 44 for node in walk(current): 45 if node.type == "element" and (include_self or node != current): 46 result = branch(node, rules) 47 if result is not None: 48 break 49 return result 50 51 def all_children(current: Element, rules: list): 52 """Get all children of the curret node.""" 53 result = None 54 for node in visit_children(current): 55 if node.type == "element": 56 result = branch(node, rules) 57 if result is not None: 58 break 59 return result 60 61 def first_sibling(node: Element, rules: list): 62 """Get the first sibling following the node.""" 63 if node.parent is None: 64 return None 65 66 idx = node.parent.children.index(node) 67 if idx + 1 < len(node.parent.children): 68 if node.parent.children[idx + 1].type == "element": 69 return branch(node.parent.children[idx + 1], rules) 70 return None 71 72 def all_siblings(current: Element, rules: list): 73 """Get all siblings after the current node.""" 74 if current.parent is None: 75 return None 76 77 result = None 78 idx = current.parent.children.index(current) 79 if idx + 1 < len(current.parent.children): 80 for node in range(idx + 1, len(current.parent.children)): 81 if current.parent.children[node].type == "element": 82 result = branch(current.parent.children[node], rules) 83 if result is not None: 84 break 85 return result 86 87 def process_dict(rules: list, node: Element): 88 if is_equal(rules[0], node): 89 if len(rules) - 1 == 0: 90 return node 91 92 if isinstance(rules[1], dict) or rules[1] == "*": 93 return ( 94 all_nodes(node, rules[1:]) 95 if isinstance(rules[1], dict) 96 else all_nodes(node, rules[2:], False) 97 ) 98 99 return branch(node, rules[1:]) 100 return None 101 102 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 103 """Based on the current rule, recursively check the nodes. 104 If on the last rule then return the current valid node. 105 """ 106 107 if len(rules) == 0: 108 return node 109 110 if isinstance(rules[0], dict): 111 return process_dict(rules, node) 112 113 if rules[0] == "*": 114 return all_nodes(node, rules[1:]) 115 116 if rules[0] == ">": 117 return all_children(node, rules[1:]) 118 119 if rules[0] == "+": 120 return first_sibling(node, rules[1:]) 121 122 if rules[0] == "~": 123 return all_siblings(node, rules[1:]) 124 125 return None 126 127 if isinstance(tree, AST): 128 tree = tree.tree 129 130 rules = parse_specifiers(specifier) 131 return all_nodes(tree, rules) 132 133 134def query_all(tree: AST | Root | Element, specifier: str) -> list[Element]: 135 """Same as javascripts querySelectorAll. `#` indicates an id and `.` 136 indicates a class. If they are used alone they match anything. 137 Any tag can be used by itself or with `#` and/or `.`. You may use 138 any number of class specifiers, but may only use one id specifier per 139 tag name. Complex specifiers are accepted are allowed meaning you can 140 have space seperated specifiers indicating nesting or a parent child 141 relationship. 142 143 Examles: 144 * `.some-example` matches the first element with the class `some-example` 145 * `#some-example` matches the first element with the id `some-example` 146 * `li` matches the first `li` element 147 * `li.red` matches the first `li` with the class `red` 148 * `li#red` matches the first `li` with the id `red` 149 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 150 * `div.form-control input[type="checkbox"]` matches the first `input` with the 151 attribute `type="checked"` that has a parent `div` with the class `form-control`. 152 153 Return: 154 list[Element] | None: The all elements matching the specifier or and empty list if no 155 elements were found. 156 """ 157 158 def all_nodes(current: Element, rules: list, include_self: bool = True): 159 """Get all nodes starting with the current node.""" 160 results = [] 161 for node in walk(current): 162 if node.type == "element" and (include_self or node != current): 163 results.extend(branch(node, rules)) 164 return results 165 166 def all_children(current: Element, rules: list): 167 """Get all children of the curret node.""" 168 results = [] 169 for node in visit_children(current): 170 if node.type == "element": 171 results.extend(branch(node, rules)) 172 return results 173 174 def first_sibling(node: Element, rules: list): 175 """Get the first sibling following the node.""" 176 if node.parent is None: 177 return [] 178 179 idx = node.parent.children.index(node) 180 if idx + 1 < len(node.parent.children): 181 if node.parent.children[idx + 1].type == "element": 182 result = branch(node.parent.children[idx + 1], rules) 183 return result 184 return [] 185 186 def all_siblings(current: Element, rules: list): 187 """Get all siblings after the current node.""" 188 if current.parent is None: 189 return [] 190 191 results = [] 192 idx = current.parent.children.index(current) 193 if idx + 1 < len(current.parent.children): 194 for node in range(idx + 1, len(current.parent.children)): 195 if current.parent.children[node].type == "element": 196 results.extend(branch(current.parent.children[node], rules)) 197 return results 198 199 def process_dict(rules: list, node: Element): 200 if is_equal(rules[0], node): 201 if len(rules) - 1 == 0: 202 return [node] 203 204 if isinstance(rules[1], dict) or rules[1] == "*": 205 return ( 206 all_nodes(node, rules[1:]) 207 if isinstance(rules[1], dict) 208 else all_nodes(node, rules[2:], False) 209 ) 210 211 return branch(node, rules[1:]) 212 return [] 213 214 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 215 """Based on the current rule, recursively check the nodes. 216 If on the last rule then return the current valid node. 217 """ 218 219 if len(rules) == 0: 220 return [node] 221 222 if isinstance(rules[0], dict): 223 return process_dict(rules, node) 224 225 if rules[0] == "*": 226 return all_nodes(node, rules[1:]) 227 228 if rules[0] == ">": 229 return all_children(node, rules[1:]) 230 231 if rules[0] == "+": 232 return first_sibling(node, rules[1:]) 233 234 if rules[0] == "~": 235 return all_siblings(node, rules[1:]) 236 237 return None 238 239 if isinstance(tree, AST): 240 tree = tree.tree 241 242 rules = parse_specifiers(specifier) 243 result = all_nodes(tree, rules) 244 return [result[i] for i in range(len(result)) if i == result.index(result[i])] 245 246 247def matches(node: Element, specifier: str) -> bool: 248 """Works the same as the Javascript matches. `#` indicates an id and `.` 249 indicates a class. If they are used alone they match anything. 250 Any tag can be used by itself or with `#` and/or `.`. You may use 251 any number of class specifiers, but may only use one id specifier per 252 tag name. Complex specifiers are not supported. Everything in the specifier 253 must relate to one element/tag. 254 255 Examles: 256 * `.some-example` matches the first element with the class `some-example` 257 * `#some-example` matches the first element with the id `some-example` 258 * `li` matches the first `li` element 259 * `li.red` matches the first `li` with the class `red` 260 * `li#red` matches the first `li` with the id `red` 261 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 262 """ 263 264 rules = parse_specifiers(specifier) 265 266 if len(rules) > 1: 267 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}") 268 if not isinstance(rules[0], dict): 269 raise Exception( 270 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\ 271Example: `li.red#sample[class^='form-'][title~='sample']`" 272 ) 273 274 return is_equal(rules[0], node) 275 276 277def is_equal(rule: dict, node: Element) -> bool: 278 """Checks if a rule is valid on a node. 279 A rule is a dictionary of possible values and each value must 280 be valid on the node. 281 282 A rule may have a tag, id, classList, and attribute list: 283 * If the `tag` is provided, the nodes `tag` must match the rules `tag` 284 * If the `id` is provided, the nodes `id` must match the rules `id` 285 * If the `classList` is not empty, each class in the `classList` must exist in the nodes 286 class attribute 287 * If the `attribute` list is not empty, each attribute in the attribute list with be compared 288 against the nodes attributes given the `attribute` lists comparators. Below is the list of 289 possible comparisons. 290 1. Exists: `[checked]` yields any element that has the attribute `checked` no matter it's 291 value. 292 2. Equals: `[checked='no']` yields any element with `checked='no'` 293 3. Contains: `[class~=sample]` or `[class*=sample]` yields any element with a class 294 containing `sample` 295 4. Equal to or startswith value-: `[class|=sample]` yields elements that either have 296 a class that equals `sample` or or a class that starts with `sample-` 297 5. Starts with: `[class^=sample]` yields elements with a class that starts with `sample` 298 6. Ends with: `[class$="sample"]` yields elements with a class that ends wtih `sample` 299 300 Args: 301 rule (dict): The rule to apply to the node. 302 node (Element): The node the validate. 303 304 Returns: 305 bool: Whether the node passes all the rules in the dictionary. 306 """ 307 308 # Validate tag 309 if rule["tag"] != "*" and rule["tag"] != node.tag: 310 return False 311 312 # Validate id 313 if rule["id"] is not None and ("id" not in node.properties or rule["id"] != node["id"]): 314 return False 315 316 # Validate class list 317 if len(rule["classList"]) > 0: 318 for klass in rule["classList"]: 319 if "class" not in node.properties or klass not in node["class"].split(" "): 320 return False 321 322 # Validate all attributes 323 if len(rule["attributes"]) > 0: 324 return all( 325 attr["name"] in node.properties.keys() 326 and __validate_attr(attr, node) 327 for attr in rule["attributes"] 328 ) 329 330 return True 331 332 333def __validate_attr(attr: dict, node: Element): 334 if attr["compare"] == "=": 335 return is_valid_attr( 336 attr=node[attr["name"]], 337 sub=attr["value"], 338 name=attr["name"], 339 validator=lambda x, y: x == y, 340 ) 341 342 if isinstance(node[attr["name"]], str): 343 if attr["compare"] == "|=": 344 return is_valid_attr( 345 attr=node[attr["name"]], 346 sub=attr["value"], 347 name=attr["name"], 348 validator=lambda x, y: x == y or x.startswith(f"{y}-"), 349 ) 350 351 if attr["compare"] == "^=": 352 return is_valid_attr( 353 attr=node[attr["name"]], 354 sub=attr["value"], 355 name=attr["name"], 356 validator=lambda x, y: x.startswith(y), 357 ) 358 359 if attr["compare"] == "$=": 360 return is_valid_attr( 361 attr=node[attr["name"]], 362 sub=attr["value"], 363 name=attr["name"], 364 validator=lambda x, y: x.endswith(y), 365 ) 366 367 if attr["compare"] in ["*=", "~="]: 368 return is_valid_attr( 369 attr=node[attr["name"]], 370 sub=attr["value"], 371 name=attr["name"], 372 validator=lambda x, y: y in x, 373 ) 374 375 return True 376 return False 377 378 379def is_valid_attr(attr: str, sub: str, name: str, validator: Callable) -> bool: 380 """Validate an attribute value with a given string and a validator callable. 381 If classlist, create list with attribute value seperated on spaces. Otherwise, 382 the list will only have the attribute value. For each item in the list, check 383 against validator, if valid add to count. 384 385 Returns: 386 True if the valid count is greater than 0. 387 """ 388 list_attributes = ["class"] 389 390 compare_values = [attr] 391 if name in list_attributes: 392 compare_values = attr.split(" ") 393 394 return bool(len([item for item in compare_values if validator(item, sub)]) > 0) 395 396 397def __parse_el_with_attribute(item: str | None, attributes: str | None) -> dict: 398 el_from_class_from_id = re.compile(r"(#|\.)?([a-zA-Z0-9_-]+)") 399 400 attr_compare_val = re.compile(r"\[([a-zA-Z0-9\-_:@]+)([\~\|\^\$\*]?=)?(\"[^\"\]\[]+\"|'[^'\]\[]+'|[^\s\]\[]+)\]") 401 402 element = { 403 "tag": "*", 404 "classList": [], 405 "id": None, 406 "attributes": [], 407 } 408 409 if attributes is not None: 410 for attr in attr_compare_val.findall(attributes): 411 name, compare, value = attr 412 if value is not None: 413 value = value.lstrip("'\"").rstrip("'\"") 414 element["attributes"].append( 415 { 416 "name": name, 417 "compare": compare, 418 "value": value, 419 } 420 ) 421 422 if item is not None: 423 for part in el_from_class_from_id.finditer(item): 424 if part.group(1) == ".": 425 if part.group(2) not in element["classList"]: 426 element["classList"].append(part.group(2)) 427 elif part.group(1) == "#": 428 if element["id"] is None: 429 element["id"] = part.group(2) 430 else: 431 raise Exception(f"There may only be one id per element specifier.\n{item}{attributes}") 432 else: 433 element["tag"] = part.group(2) or "*" 434 435 return element 436 437 438def __parse_attr_only_element(token: str) -> dict: 439 attr_compare_val = re.compile(r"\[([a-zA-Z0-9_:\-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]") 440 441 element = { 442 "tag": None, 443 "classList": [], 444 "id": None, 445 "attributes": [], 446 } 447 448 element["tag"] = "*" 449 450 if token not in ["", None]: 451 for attr in attr_compare_val.finditer(token): 452 name, compare, value = attr.groups() 453 if value is not None: 454 value = value.lstrip("'\"").rstrip("'\"") 455 element["attributes"].append( 456 { 457 "name": name, 458 "compare": compare, 459 "value": value, 460 } 461 ) 462 463 return element 464 465 466def parse_specifiers(specifier: str) -> dict: 467 """ 468 Rules: 469 * `*` = any element 470 * `>` = Everything with certain parent child relationship 471 * `+` = first sibling 472 * `~` = All after 473 * `.` = class 474 * `#` = id 475 * `[attribute]` = all elements with attribute 476 * `[attribute=value]` = all elements with attribute=value 477 * `[attribute~=value]` = all elements with attribute containing value 478 * `[attribute|=value]` = all elements with attribute=value or attribute starting with value- 479 * `node[attribute^=value]` = all elements with attribute starting with value 480 * `node[attribute$=value]` = all elements with attribute ending with value 481 * `node[attribute*=value]` = all elements with attribute containing value 482 483 """ 484 splitter = re.compile(r"([~>\*+])|(([.#]?[a-zA-Z0-9_-]+)+((\[[^\[\]]+\])*))|(\[[^\[\]]+\])+") 485 486 tokens = [] 487 for token in splitter.finditer(specifier): 488 sibling, _, item, attributes, _, just_attributes = token.groups() 489 if sibling in ["*", ">", "+", "~"]: 490 tokens.append(sibling) 491 elif item is not None or attributes is not None: 492 tokens.append(__parse_el_with_attribute(item, attributes)) 493 elif just_attributes is not None: 494 tokens.append(__parse_attr_only_element(just_attributes)) 495 return tokens
17def query(tree: AST | Root | Element, specifier: str) -> Element: 18 """Same as javascripts querySelector. `#` indicates an id and `.` 19 indicates a class. If they are used alone they match anything. 20 Any tag can be used by itself or with `#` and/or `.`. You may use 21 any number of class specifiers, but may only use one id specifier per 22 tag name. Complex specifiers are accepted are allowed meaning you can 23 have space seperated specifiers indicating nesting or a parent child 24 relationship. 25 26 Examles: 27 * `.some-example` matches the first element with the class `some-example` 28 * `#some-example` matches the first element with the id `some-example` 29 * `li` matches the first `li` element 30 * `li.red` matches the first `li` with the class `red` 31 * `li#red` matches the first `li` with the id `red` 32 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 33 * `div.form-control input[type="checkbox"]` matches the first `input` with the 34 attribute `type="checked"` that has a parent `div` with the class `form-control`. 35 36 Return: 37 Element | None: The first element matching the specifier or None if no element was 38 found. 39 """ 40 41 def all_nodes(current: Element, rules: list, include_self: bool = True): 42 """Get all nodes starting with the current node.""" 43 44 result = None 45 for node in walk(current): 46 if node.type == "element" and (include_self or node != current): 47 result = branch(node, rules) 48 if result is not None: 49 break 50 return result 51 52 def all_children(current: Element, rules: list): 53 """Get all children of the curret node.""" 54 result = None 55 for node in visit_children(current): 56 if node.type == "element": 57 result = branch(node, rules) 58 if result is not None: 59 break 60 return result 61 62 def first_sibling(node: Element, rules: list): 63 """Get the first sibling following the node.""" 64 if node.parent is None: 65 return None 66 67 idx = node.parent.children.index(node) 68 if idx + 1 < len(node.parent.children): 69 if node.parent.children[idx + 1].type == "element": 70 return branch(node.parent.children[idx + 1], rules) 71 return None 72 73 def all_siblings(current: Element, rules: list): 74 """Get all siblings after the current node.""" 75 if current.parent is None: 76 return None 77 78 result = None 79 idx = current.parent.children.index(current) 80 if idx + 1 < len(current.parent.children): 81 for node in range(idx + 1, len(current.parent.children)): 82 if current.parent.children[node].type == "element": 83 result = branch(current.parent.children[node], rules) 84 if result is not None: 85 break 86 return result 87 88 def process_dict(rules: list, node: Element): 89 if is_equal(rules[0], node): 90 if len(rules) - 1 == 0: 91 return node 92 93 if isinstance(rules[1], dict) or rules[1] == "*": 94 return ( 95 all_nodes(node, rules[1:]) 96 if isinstance(rules[1], dict) 97 else all_nodes(node, rules[2:], False) 98 ) 99 100 return branch(node, rules[1:]) 101 return None 102 103 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 104 """Based on the current rule, recursively check the nodes. 105 If on the last rule then return the current valid node. 106 """ 107 108 if len(rules) == 0: 109 return node 110 111 if isinstance(rules[0], dict): 112 return process_dict(rules, node) 113 114 if rules[0] == "*": 115 return all_nodes(node, rules[1:]) 116 117 if rules[0] == ">": 118 return all_children(node, rules[1:]) 119 120 if rules[0] == "+": 121 return first_sibling(node, rules[1:]) 122 123 if rules[0] == "~": 124 return all_siblings(node, rules[1:]) 125 126 return None 127 128 if isinstance(tree, AST): 129 tree = tree.tree 130 131 rules = parse_specifiers(specifier) 132 return all_nodes(tree, rules)
Same as javascripts querySelector. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are accepted are allowed meaning you can
have space seperated specifiers indicating nesting or a parent child
relationship.
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
div.form-control input[type="checkbox"]
matches the firstinput
with the attributetype="checked"
that has a parentdiv
with the classform-control
.
Return
Element | None: The first element matching the specifier or None if no element was found.
135def query_all(tree: AST | Root | Element, specifier: str) -> list[Element]: 136 """Same as javascripts querySelectorAll. `#` indicates an id and `.` 137 indicates a class. If they are used alone they match anything. 138 Any tag can be used by itself or with `#` and/or `.`. You may use 139 any number of class specifiers, but may only use one id specifier per 140 tag name. Complex specifiers are accepted are allowed meaning you can 141 have space seperated specifiers indicating nesting or a parent child 142 relationship. 143 144 Examles: 145 * `.some-example` matches the first element with the class `some-example` 146 * `#some-example` matches the first element with the id `some-example` 147 * `li` matches the first `li` element 148 * `li.red` matches the first `li` with the class `red` 149 * `li#red` matches the first `li` with the id `red` 150 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 151 * `div.form-control input[type="checkbox"]` matches the first `input` with the 152 attribute `type="checked"` that has a parent `div` with the class `form-control`. 153 154 Return: 155 list[Element] | None: The all elements matching the specifier or and empty list if no 156 elements were found. 157 """ 158 159 def all_nodes(current: Element, rules: list, include_self: bool = True): 160 """Get all nodes starting with the current node.""" 161 results = [] 162 for node in walk(current): 163 if node.type == "element" and (include_self or node != current): 164 results.extend(branch(node, rules)) 165 return results 166 167 def all_children(current: Element, rules: list): 168 """Get all children of the curret node.""" 169 results = [] 170 for node in visit_children(current): 171 if node.type == "element": 172 results.extend(branch(node, rules)) 173 return results 174 175 def first_sibling(node: Element, rules: list): 176 """Get the first sibling following the node.""" 177 if node.parent is None: 178 return [] 179 180 idx = node.parent.children.index(node) 181 if idx + 1 < len(node.parent.children): 182 if node.parent.children[idx + 1].type == "element": 183 result = branch(node.parent.children[idx + 1], rules) 184 return result 185 return [] 186 187 def all_siblings(current: Element, rules: list): 188 """Get all siblings after the current node.""" 189 if current.parent is None: 190 return [] 191 192 results = [] 193 idx = current.parent.children.index(current) 194 if idx + 1 < len(current.parent.children): 195 for node in range(idx + 1, len(current.parent.children)): 196 if current.parent.children[node].type == "element": 197 results.extend(branch(current.parent.children[node], rules)) 198 return results 199 200 def process_dict(rules: list, node: Element): 201 if is_equal(rules[0], node): 202 if len(rules) - 1 == 0: 203 return [node] 204 205 if isinstance(rules[1], dict) or rules[1] == "*": 206 return ( 207 all_nodes(node, rules[1:]) 208 if isinstance(rules[1], dict) 209 else all_nodes(node, rules[2:], False) 210 ) 211 212 return branch(node, rules[1:]) 213 return [] 214 215 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 216 """Based on the current rule, recursively check the nodes. 217 If on the last rule then return the current valid node. 218 """ 219 220 if len(rules) == 0: 221 return [node] 222 223 if isinstance(rules[0], dict): 224 return process_dict(rules, node) 225 226 if rules[0] == "*": 227 return all_nodes(node, rules[1:]) 228 229 if rules[0] == ">": 230 return all_children(node, rules[1:]) 231 232 if rules[0] == "+": 233 return first_sibling(node, rules[1:]) 234 235 if rules[0] == "~": 236 return all_siblings(node, rules[1:]) 237 238 return None 239 240 if isinstance(tree, AST): 241 tree = tree.tree 242 243 rules = parse_specifiers(specifier) 244 result = all_nodes(tree, rules) 245 return [result[i] for i in range(len(result)) if i == result.index(result[i])]
Same as javascripts querySelectorAll. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are accepted are allowed meaning you can
have space seperated specifiers indicating nesting or a parent child
relationship.
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
div.form-control input[type="checkbox"]
matches the firstinput
with the attributetype="checked"
that has a parentdiv
with the classform-control
.
Return
list[Element] | None: The all elements matching the specifier or and empty list if no elements were found.
248def matches(node: Element, specifier: str) -> bool: 249 """Works the same as the Javascript matches. `#` indicates an id and `.` 250 indicates a class. If they are used alone they match anything. 251 Any tag can be used by itself or with `#` and/or `.`. You may use 252 any number of class specifiers, but may only use one id specifier per 253 tag name. Complex specifiers are not supported. Everything in the specifier 254 must relate to one element/tag. 255 256 Examles: 257 * `.some-example` matches the first element with the class `some-example` 258 * `#some-example` matches the first element with the id `some-example` 259 * `li` matches the first `li` element 260 * `li.red` matches the first `li` with the class `red` 261 * `li#red` matches the first `li` with the id `red` 262 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 263 """ 264 265 rules = parse_specifiers(specifier) 266 267 if len(rules) > 1: 268 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}") 269 if not isinstance(rules[0], dict): 270 raise Exception( 271 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\ 272Example: `li.red#sample[class^='form-'][title~='sample']`" 273 ) 274 275 return is_equal(rules[0], node)
Works the same as the Javascript matches. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are not supported. Everything in the specifier
must relate to one element/tag.
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
467def parse_specifiers(specifier: str) -> dict: 468 """ 469 Rules: 470 * `*` = any element 471 * `>` = Everything with certain parent child relationship 472 * `+` = first sibling 473 * `~` = All after 474 * `.` = class 475 * `#` = id 476 * `[attribute]` = all elements with attribute 477 * `[attribute=value]` = all elements with attribute=value 478 * `[attribute~=value]` = all elements with attribute containing value 479 * `[attribute|=value]` = all elements with attribute=value or attribute starting with value- 480 * `node[attribute^=value]` = all elements with attribute starting with value 481 * `node[attribute$=value]` = all elements with attribute ending with value 482 * `node[attribute*=value]` = all elements with attribute containing value 483 484 """ 485 splitter = re.compile(r"([~>\*+])|(([.#]?[a-zA-Z0-9_-]+)+((\[[^\[\]]+\])*))|(\[[^\[\]]+\])+") 486 487 tokens = [] 488 for token in splitter.finditer(specifier): 489 sibling, _, item, attributes, _, just_attributes = token.groups() 490 if sibling in ["*", ">", "+", "~"]: 491 tokens.append(sibling) 492 elif item is not None or attributes is not None: 493 tokens.append(__parse_el_with_attribute(item, attributes)) 494 elif just_attributes is not None: 495 tokens.append(__parse_attr_only_element(just_attributes)) 496 return tokens
Rules:
*
= any element>
= Everything with certain parent child relationship+
= first sibling~
= All after.
= class#
= id[attribute]
= all elements with attribute[attribute=value]
= all elements with attribute=value[attribute~=value]
= all elements with attribute containing value[attribute|=value]
= all elements with attribute=value or attribute starting with value-node[attribute^=value]
= all elements with attribute starting with valuenode[attribute$=value]
= all elements with attribute ending with valuenode[attribute*=value]
= all elements with attribute containing value