verbex.verbex
Generate regular expressions from an easier fluent verbal form.
View Source
0"""Generate regular expressions from an easier fluent verbal form.""" 1from __future__ import annotations 2 3import re 4from enum import Enum 5from functools import wraps 6 7try: 8 from typing import ( # <--------------- if Python ≥ 3.9.0 9 Annotated, 10 ParamSpec, 11 Protocol, 12 TypeAlias, 13 runtime_checkable, 14 ) 15except ImportError: 16 from typing_extensions import TypeAlias, Protocol, Annotated, ParamSpec, runtime_checkable # type: ignore # <--- if Python < 3.9.0 # noqa E501 17 18from typing import Pattern, TypeVar 19 20from beartype import beartype # type: ignore 21from beartype.typing import ( # type: ignore 22 Any, 23 Callable, 24 Dict, 25 Iterator, 26 List, 27 Optional, 28 Tuple, 29 Union, 30 cast, 31) 32from beartype.vale import Is # type: ignore 33 34 35def _string_len_is_1(text: object) -> bool: 36 return isinstance(text, str) and len(text) == 1 37 38 39Char = Annotated[str, Is[_string_len_is_1]] 40 41 42P = ParamSpec("P") # noqa VNE001 43R = TypeVar("R") # noqa VNE001 44 45 46# work around for bug https://github.com/python/mypy/issues/12660 47# fixed in next version of mypy. 48@runtime_checkable 49class HasIter(Protocol): 50 """Workaround for mypy P.args.""" 51 52 def __iter__(self) -> Iterator[Any]: 53 """Object can be iterated. 54 55 Yields: 56 Next object. 57 """ 58 ... 59 60 61# work around for bug https://github.com/python/mypy/issues/12660 62# fixed in next version of mypy 63@runtime_checkable 64class HasItems(Protocol): 65 """Workaround for mypy P.kwargs.""" 66 67 def items(self) -> Tuple[str, Any]: 68 """Object has items method. 69 70 Returns: 71 The dict of items. 72 """ 73 ... 74 75 76class EscapedText(str): 77 """Text that has been escaped for regex. 78 79 Arguments: 80 str -- Extend the string class. 81 """ 82 83 def __new__(cls, value: str) -> EscapedText: 84 """Return a escaped regex string. 85 86 Arguments: 87 value -- the string to escape 88 89 Returns: 90 _description_ 91 """ 92 return str.__new__(cls, re.escape(value)) 93 94 95def re_escape(func: Callable[P, R]) -> Callable[P, R]: 96 """Automatically escape any string parameters as EscapedText. 97 98 Arguments: 99 func -- The function to decorate. 100 101 Returns: 102 The decorated function. 103 """ 104 105 @wraps(func) 106 def inner(*args: P.args, **kwargs: P.kwargs) -> R: # type: ignore 107 escaped_args: List[Any] = [] 108 escaped_kwargs: Dict[str, Any] = {} 109 for arg in cast(HasIter, args): 110 if not isinstance(arg, EscapedText) and isinstance(arg, str): 111 escaped_args.append(EscapedText(arg)) 112 else: 113 escaped_args.append(arg) 114 arg_k: str 115 arg_v: Any 116 for arg_k, arg_v in cast(HasItems, kwargs).items(): 117 if not isinstance(arg_v, EscapedText) and isinstance(arg_v, str): 118 escaped_kwargs[arg_k] = EscapedText(str(arg_v)) 119 else: 120 escaped_kwargs[arg_k] = arg_v 121 return func(*escaped_args, **escaped_kwargs) # type: ignore 122 123 return inner 124 125 126class CharClass(Enum): 127 """Enum of character classes in regex. 128 129 Arguments: 130 Enum -- Extends the Enum class. 131 """ 132 133 DIGIT = "\\d" 134 LETTER = "\\w" 135 UPPERCASE_LETTER = "\\u" 136 LOWERCASE_LETTER = "\\l" 137 WHITESPACE = "\\s" 138 TAB = "\\t" 139 140 def __str__(self) -> str: 141 """To string method based on Enum value. 142 143 Returns: 144 value of Enum 145 """ 146 return self.value 147 148 149class SpecialChar(Enum): 150 """Enum of special charaters, shorthand. 151 152 Arguments: 153 Enum -- Extends the Enum class. 154 """ 155 156 # does not work / should not be used in [ ] 157 LINEBREAK = "(\\n|(\\r\\n))" 158 START_OF_LINE = "^" 159 END_OF_LINE = "$" 160 TAB = "\t" 161 162 def __str__(self) -> str: 163 """To string for special chars enum. 164 165 Returns: 166 Return value of enum as string. 167 """ 168 return self.value 169 170 171CharClassOrChars: TypeAlias = Union[str, CharClass] 172EscapedCharClassOrSpecial: TypeAlias = Union[str, CharClass, SpecialChar] 173VerbexEscapedCharClassOrSpecial: TypeAlias = Union["Verbex", EscapedCharClassOrSpecial] 174 175 176def _poseur_decorator(*poseur: Any) -> Any: 177 """Positional-only arguments runtime checker.""" 178 import functools 179 180 def caller(func: Callable[P, R]) -> Callable[P, R]: # type: ignore 181 @functools.wraps(func) 182 def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: 183 poseur_args = set(poseur).intersection(kwargs) # type: ignore 184 if poseur_args: 185 raise TypeError( 186 "%s() got some positional-only arguments passed as keyword" 187 " arguments: %r" % (func.__name__, ", ".join(poseur_args)), 188 ) 189 return func(*args, **kwargs) # type: ignore 190 191 return wrapper 192 193 return caller 194 195 196class Verbex: 197 """ 198 VerbalExpressions class. 199 200 the following methods do not try to match the original js lib! 201 """ 202 203 EMPTY_REGEX_FLAG = re.RegexFlag(0) 204 205 @re_escape 206 @beartype 207 def __init__(self, modifiers: re.RegexFlag = EMPTY_REGEX_FLAG): 208 """Create a Verbex object; setting any needed flags. 209 210 Keyword Arguments: 211 modifiers -- Regex modifying flags (default: {re.RegexFlag(0)}) 212 """ 213 # self._parts: List[str] = [text] 214 self._parts: List[str] = [] 215 self._modifiers = modifiers 216 217 @property 218 def modifiers(self) -> re.RegexFlag: 219 """Return the modifiers for this Verbex object. 220 221 Returns: 222 The modifiers applied to this object. 223 """ 224 return self._modifiers 225 226 def __str__(self) -> str: 227 """Return regex string representation.""" 228 return "".join(self._parts) 229 230 @beartype 231 def _add(self, value: Union[str, List[str]]) -> Verbex: 232 """ 233 Append a transformed value to internal expression to be compiled. 234 235 As possible, this method should be "private". 236 """ 237 if isinstance(value, list): 238 self._parts.extend(value) 239 else: 240 self._parts.append(value) 241 return self 242 243 def regex(self) -> Pattern[str]: 244 """Get a regular expression object.""" 245 return re.compile( 246 str(self), 247 self._modifiers, 248 ) 249 250 # allow VerbexEscapedCharClassOrSpecial 251 252 @re_escape 253 @beartype 254 def _capture_group_with_name( 255 self, 256 name: str, 257 text: VerbexEscapedCharClassOrSpecial, 258 ) -> Verbex: 259 return self._add(f"(?<{name}>{str(text)})") 260 261 @re_escape 262 @beartype 263 def _capture_group_without_name( 264 self, 265 text: VerbexEscapedCharClassOrSpecial, 266 ) -> Verbex: 267 return self._add(f"({str(text)})") 268 269 @re_escape 270 @beartype 271 @_poseur_decorator("self") 272 def capture_group( 273 self, 274 name_or_text: Union[Optional[str], VerbexEscapedCharClassOrSpecial] = None, 275 text: Optional[VerbexEscapedCharClassOrSpecial] = None, 276 ) -> Verbex: 277 """Create a capture group. 278 279 Name is optional if not specified then the first argument is the text. 280 281 Keyword Arguments: 282 name_or_text -- The name of the group / text to search for (default: {None}) 283 text -- The text to search for (default: {None}) 284 285 Raises: 286 ValueError: If name is specified then text must be as well. 287 288 Returns: 289 Verbex with added capture group. 290 """ 291 if name_or_text is not None: 292 if text is None: 293 _text = name_or_text 294 return self._capture_group_without_name(_text) 295 if isinstance(name_or_text, str): 296 return self._capture_group_with_name(name_or_text, text) 297 raise ValueError("text must be specified with optional name") 298 299 @re_escape 300 @beartype 301 def OR(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: # noqa N802 302 """`or` is a python keyword so we use `OR` instead. 303 304 Arguments: 305 text -- Text to find or a Verbex object. 306 307 Returns: 308 Modified Verbex object. 309 """ 310 return self._add("|").find(text) 311 312 @re_escape 313 @beartype 314 def zero_or_more(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 315 """Find the text or Verbex object zero or more times. 316 317 Arguments: 318 text -- The text / Verbex object to look for. 319 320 Returns: 321 Modified Verbex object. 322 """ 323 return self._add(f"(?:{str(text)})*") 324 325 @re_escape 326 @beartype 327 def one_or_more(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 328 """Find the text or Verbex object one or more times. 329 330 Arguments: 331 text -- The text / Verbex object to look for. 332 333 Returns: 334 Modified Verbex object. 335 """ 336 return self._add(f"(?:{str(text)})+") 337 338 @re_escape 339 @beartype 340 def n_times( 341 self, 342 text: VerbexEscapedCharClassOrSpecial, 343 n: int, # noqa: VNE001 344 ) -> Verbex: 345 """Find the text or Verbex object n or more times. 346 347 Arguments: 348 text -- The text / Verbex object to look for. 349 350 Returns: 351 Modified Verbex object. 352 """ 353 return self._add(f"(?:{str(text)}){{{n}}}") 354 355 @re_escape 356 @beartype 357 def n_times_or_more( 358 self, 359 text: VerbexEscapedCharClassOrSpecial, 360 n: int, # noqa: VNE001 361 ) -> Verbex: 362 """Find the text or Verbex object at least n times. 363 364 Arguments: 365 text -- The text / Verbex object to look for. 366 367 Returns: 368 Modified Verbex object. 369 """ 370 return self._add(f"(?:{str(text)}){{{n},}}") 371 372 @re_escape 373 @beartype 374 def n_to_m_times( 375 self, 376 text: VerbexEscapedCharClassOrSpecial, 377 n: int, # noqa: VNE001 378 m: int, # noqa: VNE001 379 ) -> Verbex: 380 """Find the text or Verbex object between n and m times. 381 382 Arguments: 383 text -- The text / Verbex object to look for. 384 385 Returns: 386 Modified Verbex object. 387 """ 388 return self._add(f"(?:{str(text)}){{{n},{m}}}") 389 390 @re_escape 391 @beartype 392 def maybe(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 393 """Possibly find the text / Verbex object. 394 395 Arguments: 396 text -- The text / Verbex object to possibly find. 397 398 Returns: 399 Modified Verbex object. 400 """ 401 return self._add(f"(?:{str(text)})?") 402 403 @re_escape 404 @beartype 405 def find(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 406 """Find the text or Verbex object. 407 408 Arguments: 409 text -- The text / Verbex object to look for. 410 411 Returns: 412 Modified Verbex object. 413 """ 414 return self._add(str(text)) 415 416 @re_escape 417 @beartype 418 def then(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 419 """Synonym for find. 420 421 Arguments: 422 text -- The text / Verbex object to look for. 423 424 Returns: 425 Modified Verbex object. 426 """ 427 return self.find(text) 428 429 @re_escape 430 @beartype 431 def followed_by(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 432 """Match if string is followed by text. 433 434 Positive lookahead 435 436 Returns: 437 Modified Verbex object. 438 """ 439 return self._add(f"(?={text})") 440 441 @re_escape 442 @beartype 443 def not_followed_by(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 444 """Match if string is not followed by text. 445 446 Negative lookahead 447 448 Returns: 449 Modified Verbex object. 450 """ 451 return self._add(f"(?!{text})") 452 453 @re_escape 454 @beartype 455 def preceded_by(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 456 """Match if string is not preceded by text. 457 458 Positive lookbehind 459 460 Returns: 461 Modified Verbex object. 462 """ 463 return self._add(f"(?<={text})") 464 465 @re_escape 466 @beartype 467 def not_preceded_by(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 468 """Match if string is not preceded by text. 469 470 Negative Lookbehind 471 472 Returns: 473 Modified Verbex object. 474 """ 475 return self._add(f"(?<!{text})") 476 477 # only allow CharclassOrChars 478 479 @re_escape 480 @beartype 481 def any_of(self, chargroup: CharClassOrChars) -> Verbex: 482 """Find anything in this group of chars or char class. 483 484 Arguments: 485 text -- The characters to look for. 486 487 Returns: 488 Modified Verbex object. 489 """ 490 return self._add(f"(?:[{chargroup}])") 491 492 @re_escape 493 @beartype 494 def not_any_of(self, text: CharClassOrChars) -> Verbex: 495 """Find anything but this group of chars or char class. 496 497 Arguments: 498 text -- The characters to not look for. 499 500 Returns: 501 Modified Verbex object. 502 """ 503 return self._add(f"(?:[^{text}])") 504 505 @re_escape 506 def anything_but(self, chargroup: EscapedCharClassOrSpecial) -> Verbex: 507 """Find anything one or more times but this group of chars or char class. 508 509 Arguments: 510 text -- The characters to not look for. 511 512 Returns: 513 Modified Verbex object. 514 """ 515 return self._add(f"[^{chargroup}]+") 516 517 # no text input 518 519 def start_of_line(self) -> Verbex: 520 """Find the start of the line. 521 522 Returns: 523 Modified Verbex object. 524 """ 525 return self.find(SpecialChar.START_OF_LINE) 526 527 def end_of_line(self) -> Verbex: 528 """Find the end of the line. 529 530 Returns: 531 Modified Verbex object. 532 """ 533 return self.find(SpecialChar.END_OF_LINE) 534 535 def line_break(self) -> Verbex: 536 """Find a line break. 537 538 Returns: 539 Modified Verbex object. 540 """ 541 return self.find(SpecialChar.LINEBREAK) 542 543 def tab(self) -> Verbex: 544 """Find a tab. 545 546 Returns: 547 Modified Verbex object. 548 """ 549 return self.find(SpecialChar.TAB) 550 551 def anything(self) -> Verbex: 552 """Find anything one or more time. 553 554 Returns: 555 Modified Verbex object. 556 """ 557 return self._add(".+") 558 559 def as_few(self) -> Verbex: 560 """Modify previous search to not be greedy. 561 562 Returns: 563 Modified Verbex object. 564 """ 565 return self._add("?") 566 567 @beartype 568 def number_range(self, start: int, end: int) -> Verbex: 569 """Generate a range of numbers. 570 571 Arguments: 572 start -- Start of the range 573 end -- End of the range 574 575 Returns: 576 Modified Verbex object. 577 """ 578 return self._add("(?:" + "|".join(str(i) for i in range(start, end + 1)) + ")") 579 580 @beartype 581 def letter_range(self, start: Char, end: Char) -> Verbex: 582 """Generate a range of letters. 583 584 Arguments: 585 start -- Start of the range 586 end -- End of the range 587 588 Returns: 589 Modified Verbex object. 590 """ 591 return self._add(f"[{start}-{end}]") 592 593 def word(self) -> Verbex: 594 """Find a word on word boundary. 595 596 Returns: 597 Modified Verbex object. 598 """ 599 return self._add("(\\b\\w+\\b)") 600 601 # # --------------- modifiers ------------------------ 602 603 def with_any_case(self) -> Verbex: 604 """Modify Verbex object to be case insensitive. 605 606 Returns: 607 Modified Verbex object. 608 """ 609 self._modifiers |= re.IGNORECASE 610 return self 611 612 def search_by_line(self) -> Verbex: 613 """Search each line, ^ and $ match begining and end of line respectively. 614 615 Returns: 616 Modified Verbex object. 617 """ 618 self._modifiers |= re.MULTILINE 619 return self 620 621 def with_ascii(self) -> Verbex: 622 """Match ascii instead of unicode. 623 624 Returns: 625 Modified Verbex object. 626 """ 627 self._modifiers |= re.ASCII 628 return self 629 630 631# left over notes from original version 632# def __getattr__(self, attr): 633# """ any other function will be sent to the regex object """ 634# regex = self.regex() 635# return getattr(regex, attr) 636 637# def replace(self, string, repl): 638# return self.sub(repl, string) 639 640 641if __name__ == "__main__": 642 pass
View Source
Workaround for mypy P.args.
View Source
1429def _no_init_or_replace_init(self, *args, **kwargs): 1430 cls = type(self) 1431 1432 if cls._is_protocol: 1433 raise TypeError('Protocols cannot be instantiated') 1434 1435 # Already using a custom `__init__`. No need to calculate correct 1436 # `__init__` to call. This can lead to RecursionError. See bpo-45121. 1437 if cls.__init__ is not _no_init_or_replace_init: 1438 return 1439 1440 # Initially, `__init__` of a protocol subclass is set to `_no_init_or_replace_init`. 1441 # The first instantiation of the subclass will call `_no_init_or_replace_init` which 1442 # searches for a proper new `__init__` in the MRO. The new `__init__` 1443 # replaces the subclass' old `__init__` (ie `_no_init_or_replace_init`). Subsequent 1444 # instantiation of the protocol subclass will thus use the new 1445 # `__init__` and no longer call `_no_init_or_replace_init`. 1446 for base in cls.__mro__: 1447 init = base.__dict__.get('__init__', _no_init_or_replace_init) 1448 if init is not _no_init_or_replace_init: 1449 cls.__init__ = init 1450 break 1451 else: 1452 # should not happen 1453 cls.__init__ = object.__init__ 1454 1455 cls.__init__(self, *args, **kwargs)
View Source
Workaround for mypy P.kwargs.
View Source
1429def _no_init_or_replace_init(self, *args, **kwargs): 1430 cls = type(self) 1431 1432 if cls._is_protocol: 1433 raise TypeError('Protocols cannot be instantiated') 1434 1435 # Already using a custom `__init__`. No need to calculate correct 1436 # `__init__` to call. This can lead to RecursionError. See bpo-45121. 1437 if cls.__init__ is not _no_init_or_replace_init: 1438 return 1439 1440 # Initially, `__init__` of a protocol subclass is set to `_no_init_or_replace_init`. 1441 # The first instantiation of the subclass will call `_no_init_or_replace_init` which 1442 # searches for a proper new `__init__` in the MRO. The new `__init__` 1443 # replaces the subclass' old `__init__` (ie `_no_init_or_replace_init`). Subsequent 1444 # instantiation of the protocol subclass will thus use the new 1445 # `__init__` and no longer call `_no_init_or_replace_init`. 1446 for base in cls.__mro__: 1447 init = base.__dict__.get('__init__', _no_init_or_replace_init) 1448 if init is not _no_init_or_replace_init: 1449 cls.__init__ = init 1450 break 1451 else: 1452 # should not happen 1453 cls.__init__ = object.__init__ 1454 1455 cls.__init__(self, *args, **kwargs)
View Source
77class EscapedText(str): 78 """Text that has been escaped for regex. 79 80 Arguments: 81 str -- Extend the string class. 82 """ 83 84 def __new__(cls, value: str) -> EscapedText: 85 """Return a escaped regex string. 86 87 Arguments: 88 value -- the string to escape 89 90 Returns: 91 _description_ 92 """ 93 return str.__new__(cls, re.escape(value))
Text that has been escaped for regex.
Arguments: str -- Extend the string class.
View Source
Return a escaped regex string.
Arguments: value -- the string to escape
Returns: _description_
Inherited Members
- builtins.str
- encode
- replace
- split
- rsplit
- join
- capitalize
- casefold
- title
- center
- count
- expandtabs
- find
- partition
- index
- ljust
- lower
- lstrip
- rfind
- rindex
- rjust
- rstrip
- rpartition
- splitlines
- strip
- swapcase
- translate
- upper
- startswith
- endswith
- removeprefix
- removesuffix
- isascii
- islower
- isupper
- istitle
- isspace
- isdecimal
- isdigit
- isnumeric
- isalpha
- isalnum
- isidentifier
- isprintable
- zfill
- format
- format_map
- maketrans
View Source
96def re_escape(func: Callable[P, R]) -> Callable[P, R]: 97 """Automatically escape any string parameters as EscapedText. 98 99 Arguments: 100 func -- The function to decorate. 101 102 Returns: 103 The decorated function. 104 """ 105 106 @wraps(func) 107 def inner(*args: P.args, **kwargs: P.kwargs) -> R: # type: ignore 108 escaped_args: List[Any] = [] 109 escaped_kwargs: Dict[str, Any] = {} 110 for arg in cast(HasIter, args): 111 if not isinstance(arg, EscapedText) and isinstance(arg, str): 112 escaped_args.append(EscapedText(arg)) 113 else: 114 escaped_args.append(arg) 115 arg_k: str 116 arg_v: Any 117 for arg_k, arg_v in cast(HasItems, kwargs).items(): 118 if not isinstance(arg_v, EscapedText) and isinstance(arg_v, str): 119 escaped_kwargs[arg_k] = EscapedText(str(arg_v)) 120 else: 121 escaped_kwargs[arg_k] = arg_v 122 return func(*escaped_args, **escaped_kwargs) # type: ignore 123 124 return inner
Automatically escape any string parameters as EscapedText.
Arguments: func -- The function to decorate.
Returns: The decorated function.
View Source
127class CharClass(Enum): 128 """Enum of character classes in regex. 129 130 Arguments: 131 Enum -- Extends the Enum class. 132 """ 133 134 DIGIT = "\\d" 135 LETTER = "\\w" 136 UPPERCASE_LETTER = "\\u" 137 LOWERCASE_LETTER = "\\l" 138 WHITESPACE = "\\s" 139 TAB = "\\t" 140 141 def __str__(self) -> str: 142 """To string method based on Enum value. 143 144 Returns: 145 value of Enum 146 """ 147 return self.value
Enum of character classes in regex.
Arguments: Enum -- Extends the Enum class.
Inherited Members
- enum.Enum
- name
- value
View Source
150class SpecialChar(Enum): 151 """Enum of special charaters, shorthand. 152 153 Arguments: 154 Enum -- Extends the Enum class. 155 """ 156 157 # does not work / should not be used in [ ] 158 LINEBREAK = "(\\n|(\\r\\n))" 159 START_OF_LINE = "^" 160 END_OF_LINE = "$" 161 TAB = "\t" 162 163 def __str__(self) -> str: 164 """To string for special chars enum. 165 166 Returns: 167 Return value of enum as string. 168 """ 169 return self.value
Enum of special charaters, shorthand.
Arguments: Enum -- Extends the Enum class.
Inherited Members
- enum.Enum
- name
- value
View Source
197class Verbex: 198 """ 199 VerbalExpressions class. 200 201 the following methods do not try to match the original js lib! 202 """ 203 204 EMPTY_REGEX_FLAG = re.RegexFlag(0) 205 206 @re_escape 207 @beartype 208 def __init__(self, modifiers: re.RegexFlag = EMPTY_REGEX_FLAG): 209 """Create a Verbex object; setting any needed flags. 210 211 Keyword Arguments: 212 modifiers -- Regex modifying flags (default: {re.RegexFlag(0)}) 213 """ 214 # self._parts: List[str] = [text] 215 self._parts: List[str] = [] 216 self._modifiers = modifiers 217 218 @property 219 def modifiers(self) -> re.RegexFlag: 220 """Return the modifiers for this Verbex object. 221 222 Returns: 223 The modifiers applied to this object. 224 """ 225 return self._modifiers 226 227 def __str__(self) -> str: 228 """Return regex string representation.""" 229 return "".join(self._parts) 230 231 @beartype 232 def _add(self, value: Union[str, List[str]]) -> Verbex: 233 """ 234 Append a transformed value to internal expression to be compiled. 235 236 As possible, this method should be "private". 237 """ 238 if isinstance(value, list): 239 self._parts.extend(value) 240 else: 241 self._parts.append(value) 242 return self 243 244 def regex(self) -> Pattern[str]: 245 """Get a regular expression object.""" 246 return re.compile( 247 str(self), 248 self._modifiers, 249 ) 250 251 # allow VerbexEscapedCharClassOrSpecial 252 253 @re_escape 254 @beartype 255 def _capture_group_with_name( 256 self, 257 name: str, 258 text: VerbexEscapedCharClassOrSpecial, 259 ) -> Verbex: 260 return self._add(f"(?<{name}>{str(text)})") 261 262 @re_escape 263 @beartype 264 def _capture_group_without_name( 265 self, 266 text: VerbexEscapedCharClassOrSpecial, 267 ) -> Verbex: 268 return self._add(f"({str(text)})") 269 270 @re_escape 271 @beartype 272 @_poseur_decorator("self") 273 def capture_group( 274 self, 275 name_or_text: Union[Optional[str], VerbexEscapedCharClassOrSpecial] = None, 276 text: Optional[VerbexEscapedCharClassOrSpecial] = None, 277 ) -> Verbex: 278 """Create a capture group. 279 280 Name is optional if not specified then the first argument is the text. 281 282 Keyword Arguments: 283 name_or_text -- The name of the group / text to search for (default: {None}) 284 text -- The text to search for (default: {None}) 285 286 Raises: 287 ValueError: If name is specified then text must be as well. 288 289 Returns: 290 Verbex with added capture group. 291 """ 292 if name_or_text is not None: 293 if text is None: 294 _text = name_or_text 295 return self._capture_group_without_name(_text) 296 if isinstance(name_or_text, str): 297 return self._capture_group_with_name(name_or_text, text) 298 raise ValueError("text must be specified with optional name") 299 300 @re_escape 301 @beartype 302 def OR(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: # noqa N802 303 """`or` is a python keyword so we use `OR` instead. 304 305 Arguments: 306 text -- Text to find or a Verbex object. 307 308 Returns: 309 Modified Verbex object. 310 """ 311 return self._add("|").find(text) 312 313 @re_escape 314 @beartype 315 def zero_or_more(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 316 """Find the text or Verbex object zero or more times. 317 318 Arguments: 319 text -- The text / Verbex object to look for. 320 321 Returns: 322 Modified Verbex object. 323 """ 324 return self._add(f"(?:{str(text)})*") 325 326 @re_escape 327 @beartype 328 def one_or_more(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 329 """Find the text or Verbex object one or more times. 330 331 Arguments: 332 text -- The text / Verbex object to look for. 333 334 Returns: 335 Modified Verbex object. 336 """ 337 return self._add(f"(?:{str(text)})+") 338 339 @re_escape 340 @beartype 341 def n_times( 342 self, 343 text: VerbexEscapedCharClassOrSpecial, 344 n: int, # noqa: VNE001 345 ) -> Verbex: 346 """Find the text or Verbex object n or more times. 347 348 Arguments: 349 text -- The text / Verbex object to look for. 350 351 Returns: 352 Modified Verbex object. 353 """ 354 return self._add(f"(?:{str(text)}){{{n}}}") 355 356 @re_escape 357 @beartype 358 def n_times_or_more( 359 self, 360 text: VerbexEscapedCharClassOrSpecial, 361 n: int, # noqa: VNE001 362 ) -> Verbex: 363 """Find the text or Verbex object at least n times. 364 365 Arguments: 366 text -- The text / Verbex object to look for. 367 368 Returns: 369 Modified Verbex object. 370 """ 371 return self._add(f"(?:{str(text)}){{{n},}}") 372 373 @re_escape 374 @beartype 375 def n_to_m_times( 376 self, 377 text: VerbexEscapedCharClassOrSpecial, 378 n: int, # noqa: VNE001 379 m: int, # noqa: VNE001 380 ) -> Verbex: 381 """Find the text or Verbex object between n and m times. 382 383 Arguments: 384 text -- The text / Verbex object to look for. 385 386 Returns: 387 Modified Verbex object. 388 """ 389 return self._add(f"(?:{str(text)}){{{n},{m}}}") 390 391 @re_escape 392 @beartype 393 def maybe(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 394 """Possibly find the text / Verbex object. 395 396 Arguments: 397 text -- The text / Verbex object to possibly find. 398 399 Returns: 400 Modified Verbex object. 401 """ 402 return self._add(f"(?:{str(text)})?") 403 404 @re_escape 405 @beartype 406 def find(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 407 """Find the text or Verbex object. 408 409 Arguments: 410 text -- The text / Verbex object to look for. 411 412 Returns: 413 Modified Verbex object. 414 """ 415 return self._add(str(text)) 416 417 @re_escape 418 @beartype 419 def then(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 420 """Synonym for find. 421 422 Arguments: 423 text -- The text / Verbex object to look for. 424 425 Returns: 426 Modified Verbex object. 427 """ 428 return self.find(text) 429 430 @re_escape 431 @beartype 432 def followed_by(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 433 """Match if string is followed by text. 434 435 Positive lookahead 436 437 Returns: 438 Modified Verbex object. 439 """ 440 return self._add(f"(?={text})") 441 442 @re_escape 443 @beartype 444 def not_followed_by(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 445 """Match if string is not followed by text. 446 447 Negative lookahead 448 449 Returns: 450 Modified Verbex object. 451 """ 452 return self._add(f"(?!{text})") 453 454 @re_escape 455 @beartype 456 def preceded_by(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 457 """Match if string is not preceded by text. 458 459 Positive lookbehind 460 461 Returns: 462 Modified Verbex object. 463 """ 464 return self._add(f"(?<={text})") 465 466 @re_escape 467 @beartype 468 def not_preceded_by(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 469 """Match if string is not preceded by text. 470 471 Negative Lookbehind 472 473 Returns: 474 Modified Verbex object. 475 """ 476 return self._add(f"(?<!{text})") 477 478 # only allow CharclassOrChars 479 480 @re_escape 481 @beartype 482 def any_of(self, chargroup: CharClassOrChars) -> Verbex: 483 """Find anything in this group of chars or char class. 484 485 Arguments: 486 text -- The characters to look for. 487 488 Returns: 489 Modified Verbex object. 490 """ 491 return self._add(f"(?:[{chargroup}])") 492 493 @re_escape 494 @beartype 495 def not_any_of(self, text: CharClassOrChars) -> Verbex: 496 """Find anything but this group of chars or char class. 497 498 Arguments: 499 text -- The characters to not look for. 500 501 Returns: 502 Modified Verbex object. 503 """ 504 return self._add(f"(?:[^{text}])") 505 506 @re_escape 507 def anything_but(self, chargroup: EscapedCharClassOrSpecial) -> Verbex: 508 """Find anything one or more times but this group of chars or char class. 509 510 Arguments: 511 text -- The characters to not look for. 512 513 Returns: 514 Modified Verbex object. 515 """ 516 return self._add(f"[^{chargroup}]+") 517 518 # no text input 519 520 def start_of_line(self) -> Verbex: 521 """Find the start of the line. 522 523 Returns: 524 Modified Verbex object. 525 """ 526 return self.find(SpecialChar.START_OF_LINE) 527 528 def end_of_line(self) -> Verbex: 529 """Find the end of the line. 530 531 Returns: 532 Modified Verbex object. 533 """ 534 return self.find(SpecialChar.END_OF_LINE) 535 536 def line_break(self) -> Verbex: 537 """Find a line break. 538 539 Returns: 540 Modified Verbex object. 541 """ 542 return self.find(SpecialChar.LINEBREAK) 543 544 def tab(self) -> Verbex: 545 """Find a tab. 546 547 Returns: 548 Modified Verbex object. 549 """ 550 return self.find(SpecialChar.TAB) 551 552 def anything(self) -> Verbex: 553 """Find anything one or more time. 554 555 Returns: 556 Modified Verbex object. 557 """ 558 return self._add(".+") 559 560 def as_few(self) -> Verbex: 561 """Modify previous search to not be greedy. 562 563 Returns: 564 Modified Verbex object. 565 """ 566 return self._add("?") 567 568 @beartype 569 def number_range(self, start: int, end: int) -> Verbex: 570 """Generate a range of numbers. 571 572 Arguments: 573 start -- Start of the range 574 end -- End of the range 575 576 Returns: 577 Modified Verbex object. 578 """ 579 return self._add("(?:" + "|".join(str(i) for i in range(start, end + 1)) + ")") 580 581 @beartype 582 def letter_range(self, start: Char, end: Char) -> Verbex: 583 """Generate a range of letters. 584 585 Arguments: 586 start -- Start of the range 587 end -- End of the range 588 589 Returns: 590 Modified Verbex object. 591 """ 592 return self._add(f"[{start}-{end}]") 593 594 def word(self) -> Verbex: 595 """Find a word on word boundary. 596 597 Returns: 598 Modified Verbex object. 599 """ 600 return self._add("(\\b\\w+\\b)") 601 602 # # --------------- modifiers ------------------------ 603 604 def with_any_case(self) -> Verbex: 605 """Modify Verbex object to be case insensitive. 606 607 Returns: 608 Modified Verbex object. 609 """ 610 self._modifiers |= re.IGNORECASE 611 return self 612 613 def search_by_line(self) -> Verbex: 614 """Search each line, ^ and $ match begining and end of line respectively. 615 616 Returns: 617 Modified Verbex object. 618 """ 619 self._modifiers |= re.MULTILINE 620 return self 621 622 def with_ascii(self) -> Verbex: 623 """Match ascii instead of unicode. 624 625 Returns: 626 Modified Verbex object. 627 """ 628 self._modifiers |= re.ASCII 629 return self
VerbalExpressions class.
the following methods do not try to match the original js lib!
View Source
206 @re_escape 207 @beartype 208 def __init__(self, modifiers: re.RegexFlag = EMPTY_REGEX_FLAG): 209 """Create a Verbex object; setting any needed flags. 210 211 Keyword Arguments: 212 modifiers -- Regex modifying flags (default: {re.RegexFlag(0)}) 213 """ 214 # self._parts: List[str] = [text] 215 self._parts: List[str] = [] 216 self._modifiers = modifiers
Create a Verbex object; setting any needed flags.
Keyword Arguments: modifiers -- Regex modifying flags (default: {re.RegexFlag(0)})
Return the modifiers for this Verbex object.
Returns: The modifiers applied to this object.
View Source
Get a regular expression object.
View Source
270 @re_escape 271 @beartype 272 @_poseur_decorator("self") 273 def capture_group( 274 self, 275 name_or_text: Union[Optional[str], VerbexEscapedCharClassOrSpecial] = None, 276 text: Optional[VerbexEscapedCharClassOrSpecial] = None, 277 ) -> Verbex: 278 """Create a capture group. 279 280 Name is optional if not specified then the first argument is the text. 281 282 Keyword Arguments: 283 name_or_text -- The name of the group / text to search for (default: {None}) 284 text -- The text to search for (default: {None}) 285 286 Raises: 287 ValueError: If name is specified then text must be as well. 288 289 Returns: 290 Verbex with added capture group. 291 """ 292 if name_or_text is not None: 293 if text is None: 294 _text = name_or_text 295 return self._capture_group_without_name(_text) 296 if isinstance(name_or_text, str): 297 return self._capture_group_with_name(name_or_text, text) 298 raise ValueError("text must be specified with optional name")
Create a capture group.
Name is optional if not specified then the first argument is the text.
Keyword Arguments: name_or_text -- The name of the group / text to search for (default: {None}) text -- The text to search for (default: {None})
Raises: ValueError: If name is specified then text must be as well.
Returns: Verbex with added capture group.
View Source
300 @re_escape 301 @beartype 302 def OR(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: # noqa N802 303 """`or` is a python keyword so we use `OR` instead. 304 305 Arguments: 306 text -- Text to find or a Verbex object. 307 308 Returns: 309 Modified Verbex object. 310 """ 311 return self._add("|").find(text)
or
is a python keyword so we use OR
instead.
Arguments: text -- Text to find or a Verbex object.
Returns: Modified Verbex object.
View Source
313 @re_escape 314 @beartype 315 def zero_or_more(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 316 """Find the text or Verbex object zero or more times. 317 318 Arguments: 319 text -- The text / Verbex object to look for. 320 321 Returns: 322 Modified Verbex object. 323 """ 324 return self._add(f"(?:{str(text)})*")
Find the text or Verbex object zero or more times.
Arguments: text -- The text / Verbex object to look for.
Returns: Modified Verbex object.
View Source
326 @re_escape 327 @beartype 328 def one_or_more(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 329 """Find the text or Verbex object one or more times. 330 331 Arguments: 332 text -- The text / Verbex object to look for. 333 334 Returns: 335 Modified Verbex object. 336 """ 337 return self._add(f"(?:{str(text)})+")
Find the text or Verbex object one or more times.
Arguments: text -- The text / Verbex object to look for.
Returns: Modified Verbex object.
View Source
339 @re_escape 340 @beartype 341 def n_times( 342 self, 343 text: VerbexEscapedCharClassOrSpecial, 344 n: int, # noqa: VNE001 345 ) -> Verbex: 346 """Find the text or Verbex object n or more times. 347 348 Arguments: 349 text -- The text / Verbex object to look for. 350 351 Returns: 352 Modified Verbex object. 353 """ 354 return self._add(f"(?:{str(text)}){{{n}}}")
Find the text or Verbex object n or more times.
Arguments: text -- The text / Verbex object to look for.
Returns: Modified Verbex object.
View Source
356 @re_escape 357 @beartype 358 def n_times_or_more( 359 self, 360 text: VerbexEscapedCharClassOrSpecial, 361 n: int, # noqa: VNE001 362 ) -> Verbex: 363 """Find the text or Verbex object at least n times. 364 365 Arguments: 366 text -- The text / Verbex object to look for. 367 368 Returns: 369 Modified Verbex object. 370 """ 371 return self._add(f"(?:{str(text)}){{{n},}}")
Find the text or Verbex object at least n times.
Arguments: text -- The text / Verbex object to look for.
Returns: Modified Verbex object.
View Source
373 @re_escape 374 @beartype 375 def n_to_m_times( 376 self, 377 text: VerbexEscapedCharClassOrSpecial, 378 n: int, # noqa: VNE001 379 m: int, # noqa: VNE001 380 ) -> Verbex: 381 """Find the text or Verbex object between n and m times. 382 383 Arguments: 384 text -- The text / Verbex object to look for. 385 386 Returns: 387 Modified Verbex object. 388 """ 389 return self._add(f"(?:{str(text)}){{{n},{m}}}")
Find the text or Verbex object between n and m times.
Arguments: text -- The text / Verbex object to look for.
Returns: Modified Verbex object.
View Source
391 @re_escape 392 @beartype 393 def maybe(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: 394 """Possibly find the text / Verbex object. 395 396 Arguments: 397 text -- The text / Verbex object to possibly find. 398 399 Returns: 400 Modified Verbex object. 401 """ 402 return self._add(f"(?:{str(text)})?")
Possibly find the text / Verbex object.
Arguments: text -- The text / Verbex object to possibly find.
Returns: Modified Verbex object.
View Source
Find the text or Verbex object.
Arguments: text -- The text / Verbex object to look for.
Returns: Modified Verbex object.
View Source
Synonym for find.
Arguments: text -- The text / Verbex object to look for.
Returns: Modified Verbex object.
View Source
Match if string is followed by text.
Positive lookahead
Returns: Modified Verbex object.
View Source
Match if string is not followed by text.
Negative lookahead
Returns: Modified Verbex object.
View Source
Match if string is not preceded by text.
Positive lookbehind
Returns: Modified Verbex object.
View Source
Match if string is not preceded by text.
Negative Lookbehind
Returns: Modified Verbex object.
View Source
480 @re_escape 481 @beartype 482 def any_of(self, chargroup: CharClassOrChars) -> Verbex: 483 """Find anything in this group of chars or char class. 484 485 Arguments: 486 text -- The characters to look for. 487 488 Returns: 489 Modified Verbex object. 490 """ 491 return self._add(f"(?:[{chargroup}])")
Find anything in this group of chars or char class.
Arguments: text -- The characters to look for.
Returns: Modified Verbex object.
View Source
493 @re_escape 494 @beartype 495 def not_any_of(self, text: CharClassOrChars) -> Verbex: 496 """Find anything but this group of chars or char class. 497 498 Arguments: 499 text -- The characters to not look for. 500 501 Returns: 502 Modified Verbex object. 503 """ 504 return self._add(f"(?:[^{text}])")
Find anything but this group of chars or char class.
Arguments: text -- The characters to not look for.
Returns: Modified Verbex object.
View Source
506 @re_escape 507 def anything_but(self, chargroup: EscapedCharClassOrSpecial) -> Verbex: 508 """Find anything one or more times but this group of chars or char class. 509 510 Arguments: 511 text -- The characters to not look for. 512 513 Returns: 514 Modified Verbex object. 515 """ 516 return self._add(f"[^{chargroup}]+")
Find anything one or more times but this group of chars or char class.
Arguments: text -- The characters to not look for.
Returns: Modified Verbex object.
View Source
Find the start of the line.
Returns: Modified Verbex object.
View Source
Find the end of the line.
Returns: Modified Verbex object.
View Source
Find a line break.
Returns: Modified Verbex object.
View Source
Find a tab.
Returns: Modified Verbex object.
View Source
Find anything one or more time.
Returns: Modified Verbex object.
View Source
Modify previous search to not be greedy.
Returns: Modified Verbex object.
View Source
568 @beartype 569 def number_range(self, start: int, end: int) -> Verbex: 570 """Generate a range of numbers. 571 572 Arguments: 573 start -- Start of the range 574 end -- End of the range 575 576 Returns: 577 Modified Verbex object. 578 """ 579 return self._add("(?:" + "|".join(str(i) for i in range(start, end + 1)) + ")")
Generate a range of numbers.
Arguments: start -- Start of the range end -- End of the range
Returns: Modified Verbex object.
View Source
Generate a range of letters.
Arguments: start -- Start of the range end -- End of the range
Returns: Modified Verbex object.
View Source
Find a word on word boundary.
Returns: Modified Verbex object.
View Source
Modify Verbex object to be case insensitive.
Returns: Modified Verbex object.
View Source
Search each line, ^ and $ match begining and end of line respectively.
Returns: Modified Verbex object.
View Source
Match ascii instead of unicode.
Returns: Modified Verbex object.