pycnnum.pycnnum
Chinese number <=> int/float conversion
1# pylint: disable=line-too-long 2"""Chinese number <=> int/float conversion """ 3 4from __future__ import annotations 5 6import copy 7import re 8from dataclasses import dataclass 9from enum import Enum 10from functools import cached_property 11from math import log2 12from typing import Callable, List, Optional, Tuple, Union 13 14from .constants import DIGITS, UNITS 15 16 17class NumberingType(Enum): 18 r"""Numbering system types: LOW, MID, HIGH 19 20 Chinese numbering types: 21 22 For $i \in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]$: 23 24 25 - `LOW` : $10^{8 + i}$ 26 - `MID` : $10^{8 + i*4}$ 27 - `HIGH` : $10^{2^{i+3}}$ 28 29 --- 30 31 | type | 亿 | 兆 | 京 | 垓 | 秭 | 穰 | 沟 | 涧 | 正 | 载 | 32 |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------| 33 |`low` | $10^{8}$|$10^{9}$ |$10^{10}$|$10^{11}$|$10^{12}$ |$10^{13}$ |$10^{14}$ |$10^{15}$ |$10^{16}$ |$10^{17}$ | 34 |`mid` | $10^{8}$|$10^{12}$|$10^{16}$|$10^{20}$|$10^{24}$ |$10^{28}$ |$10^{32}$ |$10^{36}$ |$10^{40}$ |$10^{44}$ | 35 |`high` | $10^{8}$|$10^{16}$|$10^{32}$|$10^{64}$|$10^{128}$|$10^{256}$|$10^{512}$|$10^{1024}$|$10^{2048}$|$10^{4096}$| 36 """ 37 38 LOW = "low" 39 """ 40 | type | 亿 | 兆 | 京 | 垓 | 秭 | 穰 | 沟 | 涧 | 正 | 载 | 41 |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------| 42 |`low` | $10^{8}$|$10^{9}$ |$10^{10}$|$10^{11}$|$10^{12}$ |$10^{13}$ |$10^{14}$ |$10^{15}$ |$10^{16}$ |$10^{17}$ | 43 """ 44 45 MID = "mid" 46 """ 47 | type | 亿 | 兆 | 京 | 垓 | 秭 | 穰 | 沟 | 涧 | 正 | 载 | 48 |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------| 49 |`mid` | $10^{8}$|$10^{12}$|$10^{16}$|$10^{20}$|$10^{24}$ |$10^{28}$ |$10^{32}$ |$10^{36}$ |$10^{40}$ |$10^{44}$ | 50 """ 51 52 HIGH = "high" 53 """ 54 | type | 亿 | 兆 | 京 | 垓 | 秭 | 穰 | 沟 | 涧 | 正 | 载 | 55 |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------| 56 |`high` | $10^{8}$|$10^{16}$|$10^{32}$|$10^{64}$|$10^{128}$|$10^{256}$|$10^{512}$|$10^{1024}$|$10^{2048}$|$10^{4096}$| 57 """ 58 59 @property 60 def powers(self) -> List[int]: 61 """Powers of units for each numbering type""" 62 return { 63 NumberingType.LOW: [8 + i for i in range(10)], 64 NumberingType.MID: [8 + 4 * i for i in range(10)], 65 NumberingType.HIGH: [2 ** (i + 3) for i in range(10)], 66 }[self] 67 68 69# region char classes 70@dataclass 71class CNChar: 72 """Base Chinese char class. 73 74 Each object has simplified and traditional strings. 75 When converted to string, it will shows the simplified string or traditional string or space `' '`. 76 77 Example: 78 ```python 79 80 >>> negative = CNChar(simplified="负", traditional="負") 81 >>> negative.simplified 82 '负' 83 >>> negative.traditional 84 '負' 85 >>> negative.upper_simplified 86 '负' 87 >>> negative.upper_traditional 88 '負' 89 90 ``` 91 """ 92 93 simplified: Optional[str] = None 94 """Simplified Chinese character""" 95 96 traditional: Optional[str] = None 97 """Traditional Chinese character""" 98 99 upper_simplified: Optional[str] = None 100 """Capitalized character in simplified Chinese. Defaults to `None` means same as `self.simplified`.""" 101 102 upper_traditional: Optional[str] = None 103 """Capitalized character in traditional Chinese. Defaults to `None` means same as `self.traditional`.""" 104 105 @property 106 def all_forms(self) -> str: 107 """All forms of the character""" 108 return "".join(v for v in self.__dict__.values() if isinstance(v, str)) 109 110 def __post_init__(self): 111 """Post initialization""" 112 self.simplified = self.simplified or "" 113 self.traditional = self.traditional or self.simplified 114 self.upper_simplified = self.upper_simplified or self.simplified 115 self.upper_traditional = self.upper_traditional or self.traditional 116 117 def __str__(self) -> str: 118 return self.simplified if self.simplified else f"Empty {self.__class__.__name__}" 119 120 def __repr__(self) -> str: 121 return str(self) 122 123 124@dataclass 125class CNUnit(CNChar): 126 """Chinese number unit class 127 128 Each of it is an `CNChar` with additional upper type strings. 129 130 Example: 131 ```python 132 133 >>> wan = CNUnit(*"万萬萬萬", power=4) 134 >>> wan 135 10^4 136 137 ``` 138 """ 139 140 power: int = 0 141 r"""The power of this unit, e.g. `power` = 4 for `'万'` ( \(10^4\) )""" 142 143 def __str__(self) -> str: 144 return f"10^{self.power}" 145 146 def __repr__(self) -> str: 147 return str(self) 148 149 @classmethod 150 def create(cls, power: int, numbering_type: NumberingType = NumberingType.MID) -> CNUnit: 151 """Create one unit character based on power value from constants 152 153 - `SMALLER_CHINESE_NUMBERING_UNITS_SIMPLIFIED` 154 - `SMALLER_CHINESE_NUMBERING_UNITS_TRADITIONAL` 155 - `LARGER_CHINESE_NUMBERING_UNITS_SIMPLIFIED` 156 - `LARGER_CHINESE_NUMBERING_UNITS_TRADITIONAL` 157 158 Example: 159 ```python 160 161 >>> CNUnit.create(power=8, numbering_type=NumberingType.LOW).simplified 162 '亿' 163 >>> CNUnit.create(power=12, numbering_type=NumberingType.LOW).simplified 164 '秭' 165 >>> CNUnit.create(power=12, numbering_type=NumberingType.HIGH).simplified 166 Traceback (most recent call last): 167 ... 168 ValueError: power = 12 is invalid for numbering_type = <NumberingType.HIGH: 'high'> 169 170 ``` 171 172 Args: 173 power (int): Unit power, starting from 1. 174 numbering_type (NumberingType, optional): Numbering type. Defaults to `NumberingType.MID`. 175 176 Raises: 177 ValueError: Raised when invalid `numbering_type` is provided 178 179 Returns: 180 CNUnit: Created unit object 181 """ 182 assert power > 0, "Power should be greater than 0." 183 184 if power < 5: 185 return cls(*UNITS[power - 1], power=power) # type: ignore[misc] 186 187 i = float(power - 8) 188 if numbering_type == NumberingType.LOW: 189 pass 190 elif numbering_type == NumberingType.MID: 191 i = i / 4 192 elif numbering_type == NumberingType.HIGH: 193 i = log2(power) - 3 194 else: 195 raise ValueError(f"Numbering type should be in {NumberingType} but {numbering_type} is provided.") 196 197 i = int(i) if i.is_integer() else -1 198 199 if i < 0: 200 raise ValueError(f"{power = } is invalid for {numbering_type = }") 201 202 return cls(*UNITS[i + 4], power=power) # type: ignore[misc] 203 204 205@dataclass 206class CNDigit(CNChar): 207 """Chinese number digit class 208 209 Example: 210 ```python 211 212 >>> CNDigit(*"三叁叁叁", int_value=3) 213 3 214 215 ``` 216 """ 217 218 int_value: int = 0 219 """Integer value of the digit, 0 to 9. Defaults to 0.""" 220 221 alt_s: Optional[str] = None 222 """Alternative simplified character, e.g. '两' for 2. Defaults to `None`. 223 """ 224 225 alt_t: Optional[str] = None 226 """Alternative traditional character, e.g. '俩' for 2. Defaults to `None`. 227 """ 228 229 def __str__(self): 230 return str(self.int_value) 231 232 def __repr__(self): 233 return str(self) 234 235 236@dataclass 237class CNMath(CNChar): 238 """ 239 Chinese math operators 240 241 Example: 242 ```python 243 244 >>> positive = CNMath(*"正正正正", symbol="+", expression=lambda x: +x) 245 >>> positive.symbol 246 '+' 247 248 ``` 249 """ 250 251 symbol: str = "" 252 """Mathematical symbol, e.g. '+'. Defaults to ``.""" 253 254 expression: Optional[Callable] = None 255 """Mathematical expression, e.g. `lambda x: +x`. Defaults to `None`.""" 256 257 def __str__(self): 258 return self.symbol 259 260 def __repr__(self): 261 return str(self) 262 263 264SymbolType = Union[CNUnit, CNDigit, CNMath] 265 266 267@dataclass 268class MathSymbols: 269 """Math symbols used in Chinese for both traditional and simplified Chinese 270 271 - positive = ['正', '正'] 272 - negative = ['负', '負'] 273 - point = ['点', '點'] 274 275 Used in `NumberingSystem`. 276 277 Example: 278 ```python 279 280 >>> positive = CNMath(*"正正正正", symbol="+", expression=lambda x: +x) 281 >>> negative = CNMath(*"负負负負", symbol="-", expression=lambda x: -x) 282 >>> point = CNMath(*"点點点點", symbol=".", expression=lambda integer, decimal: float(str(integer) + "." + str(decimal))) 283 >>> math = MathSymbols(positive, negative, point) 284 >>> math.positive 285 + 286 >>> list(math) 287 [+, -, .] 288 >>> for i in math: 289 ... print(i) 290 + 291 - 292 . 293 294 ``` 295 """ 296 297 positive: CNMath 298 """Positive""" 299 300 negative: CNMath 301 """Negative""" 302 303 point: CNMath 304 """Decimal point""" 305 306 def __iter__(self): 307 for v in self.__dict__.values(): 308 yield v 309 310 311class NumberingSystem: 312 """Numbering system class 313 314 Example: 315 ```python 316 317 >>> system = NumberingSystem(NumberingType.MID) 318 >>> system.numbering_type 319 <NumberingType.MID: 'mid'> 320 >>> system.digits[0] 321 0 322 >>> system.units[0] 323 10^1 324 >>> system.units[7].simplified 325 '垓' 326 >>> system.math.positive 327 + 328 329 ``` 330 """ 331 332 # region: fields 333 _numbering_type: NumberingType 334 """Numbering type""" 335 336 _digits: List[CNDigit] 337 """Digits""" 338 339 _units: List[CNUnit] 340 """Units""" 341 342 _maths: MathSymbols 343 """Math symbols""" 344 345 @property 346 def numbering_type(self) -> NumberingType: 347 """Numbering type""" 348 return self._numbering_type 349 350 @numbering_type.setter 351 def numbering_type(self, value: NumberingType): 352 self._numbering_type = value 353 self._units[4:] = [CNUnit.create(i, self._numbering_type) for i in self._numbering_type.powers] 354 355 @property 356 def digits(self) -> List[CNDigit]: 357 """Digits""" 358 result = [CNDigit(*v, int_value=i) for i, v in enumerate(DIGITS)] # type: ignore[misc] 359 result[0].alt_s, result[0].alt_t = "〇", "〇" 360 result[2].alt_s, result[2].alt_t = "两", "兩" 361 return result 362 363 @property 364 def units(self) -> List[CNUnit]: 365 """Units""" 366 return self._units 367 368 @cached_property 369 def math(self) -> MathSymbols: 370 """Math symbols""" 371 positive_cn = CNMath(*"正正", symbol="+", expression=lambda x: x) # type: ignore[misc] 372 negative_cn = CNMath(*"负負", symbol="-", expression=lambda x: -x) # type: ignore[misc] 373 point_cn = CNMath(*"点點", symbol=".", expression=lambda i, d: float(f"{i}.{d}")) # type: ignore[misc] 374 return MathSymbols(positive_cn, negative_cn, point_cn) 375 376 # endregion: fields 377 378 def __init__(self, numbering_type: NumberingType = NumberingType.MID) -> None: 379 """Construction""" 380 self._numbering_type = numbering_type 381 self._units = [CNUnit(*UNITS[i], power=i + 1) for i in range(4)] # type: ignore[misc] 382 self._units[4:] = [CNUnit.create(i, self._numbering_type) for i in self._numbering_type.powers] 383 384 def __getitem__(self, key: str) -> SymbolType: 385 if not isinstance(key, str): 386 raise ValueError(f"{key = } should be a string.") 387 388 for c in self.units + self.digits + list(self.math): 389 if key in c.all_forms: 390 return c 391 392 raise ValueError(f"{key} is not in {self.numbering_type.name} system.") 393 394 def cn2symbols(self, cn_str: str) -> Tuple[List[SymbolType], List[SymbolType]]: 395 """Chinese string to symbols 396 397 Example: 398 ```python 399 400 >>> system = NumberingSystem(NumberingType.MID) 401 >>> system.cn2symbols("一百八") 402 ([1, 10^2, 8], []) 403 >>> system.cn2symbols("一百八十") 404 ([1, 10^2, 8, 10^1], []) 405 >>> system.cn2symbols("一百八点五六七") 406 ([1, 10^2, 8], [5, 6, 7]) 407 >>> system.cn2symbols("两千万一百八十") 408 ([2, 10^7, 1, 10^2, 8, 10^1], []) 409 >>> system.cn2symbols("正两千万一百八十") 410 ([+, 2, 10^7, 1, 10^2, 8, 10^1], []) 411 >>> system.cn2symbols("负两千万一百八十") 412 ([-, 2, 10^7, 1, 10^2, 8, 10^1], []) 413 >>> system.cn2symbols("点负两千万一百八十") 414 Traceback (most recent call last): 415 ... 416 ValueError: First symbol in decimal part should not be a math symbol, - is provided. 417 >>> system.cn2symbols("两千万点一百点八十") 418 Traceback (most recent call last): 419 ... 420 ValueError: Multiple points in the number 两千万点一百点八十. 421 >>> system.cn2symbols("两千万点一百點八十") 422 Traceback (most recent call last): 423 ... 424 ValueError: Multiple points in the number 两千万点一百點八十. 425 426 ``` 427 428 Args: 429 cn_str (str): Chinese number 430 431 Returns: 432 Tuple[List[SymbolType], List[SymbolType]]: Integer symbols, decimal symbols 433 """ 434 if cn_str == "": 435 return [], [] 436 437 int_part, dec_part = cn_str, "" 438 int_dec = re.split(r"\.|点|點", cn_str) 439 if len(int_dec) > 2: 440 raise ValueError(f"Multiple points in the number {cn_str}.") 441 int_part, dec_part = int_dec if len(int_dec) == 2 else (int_dec[0], "") 442 443 integer_value = [copy.deepcopy(self[c]) for c in int_part] 444 445 for i, v in enumerate(integer_value): 446 if not isinstance(v, CNUnit): 447 continue 448 if i + 1 < len(integer_value) and isinstance(integer_value[i + 1], CNUnit): 449 v.power += integer_value[i + 1].power # type: ignore[union-attr] 450 integer_value[i + 1] = None # type: ignore[union-attr] 451 452 integer_value = [v for v in integer_value if v is not None] 453 454 for i, v in enumerate(integer_value): 455 if not isinstance(v, CNUnit): 456 continue 457 for u in integer_value[i + 1 :]: 458 if isinstance(u, CNUnit) and u.power > v.power: 459 v.power += u.power 460 break 461 462 decimal_value = [copy.deepcopy(self[c]) for c in dec_part] 463 464 # if first symbol is a math symbol, e.g. '正两千万一百八十' 465 if int_part and (first_symbol := [c for c in self.math if int_part[0] in c.all_forms]): 466 integer_value[0] = first_symbol[0] 467 468 # if first symbol is a math symbol, e.g. '点负两千万一百八十' 469 if decimal_value and (decimal_value[0] in self.math): 470 raise ValueError( 471 f"First symbol in decimal part should not be a math symbol, {decimal_value[0]} is provided." 472 ) 473 474 return integer_value, decimal_value 475 476 def _refine_integer_symbols(self, integer_symbols: List[SymbolType]) -> List[SymbolType]: 477 """Refine integer symbols. Do not change math symbols. 478 479 Example: 480 ```python 481 482 >>> s = NumberingSystem(NumberingType.MID) 483 >>> s._refine_integer_symbols(s.cn2symbols("十八")[0]) 484 [1, 10^1, 8, 10^0] 485 >>> s._refine_integer_symbols(s.cn2symbols("两千万一百八十")[0]) 486 [2, 10^7, 1, 10^2, 8, 10^1] 487 >>> s._refine_integer_symbols(s.cn2symbols("两千万零一百八十")[0]) 488 [2, 10^7, 1, 10^2, 8, 10^1] 489 >>> s._refine_integer_symbols(s.cn2symbols("两亿六")[0]) 490 [2, 10^8, 6, 10^7] 491 492 ``` 493 494 Args: 495 integer_symbols (List[SymbolType]): Raw integer symbols 496 497 Returns: 498 List[SymbolType]: Refined symbols 499 """ 500 if not integer_symbols: 501 return integer_symbols 502 503 # first symbol is unit, add 1 before it, e.g. , "十五" to "一十五" 504 if isinstance(integer_symbols[0], CNUnit) and integer_symbols[0].power == 1: 505 integer_symbols = [self.digits[1]] + integer_symbols 506 507 # last symbol is digit and the second last symbol is unit 508 # e.g. "十 五" to "十 五 10^0", "二 百 五" to "二 百 五 10^1" 509 if len(integer_symbols) > 1: 510 if isinstance(integer_symbols[-1], CNDigit) and isinstance(integer_symbols[-2], CNUnit): 511 # add a dummy unit 512 integer_symbols += [CNUnit(power=integer_symbols[-2].power - 1)] 513 514 result: List[SymbolType] = [] 515 unit_count = 0 516 for s in integer_symbols: 517 if isinstance(s, CNMath): 518 # reset unit_count, e.g. "两千万" has two units 519 unit_count = 0 520 continue 521 if isinstance(s, CNDigit): 522 # reset unit_count, e.g. "两千万" has two units 523 unit_count = 0 524 if s.int_value > 0: 525 result.append(s) 526 continue 527 if not isinstance(s, CNUnit): 528 raise ValueError(f"Invalid symbol {s} in {integer_symbols}.") 529 530 # create a dummy unit 531 current_unit = CNUnit("", "", "", "", s.power) 532 unit_count += 1 533 534 # store the first met unit 535 if unit_count == 1: 536 result.append(current_unit) 537 continue 538 539 # if there are more than one units, sum them, e.g. "两 千 万" to "两 10^7" 540 result[-1].power += current_unit.power # type: ignore[union-attr] 541 if integer_symbols[0] == self.math.negative: 542 result = [self.math.negative] + result 543 return result 544 545 def get_int_value(self, integer_symbols: List[SymbolType]) -> int: 546 """Compute the value from symbol 547 548 Example: 549 ```python 550 551 >>> s = NumberingSystem(NumberingType.MID) 552 >>> s.get_int_value(s.cn2symbols("十八")[0]) 553 18 554 >>> s.get_int_value(s.cn2symbols("两千万一百八十")[0]) 555 20000180 556 >>> s.get_int_value(s.cn2symbols("两亿六")[0]) 557 260000000 558 559 ``` 560 561 Args: 562 integer_symbols (List[SymbolType]): Symbols, without point 563 564 Returns: 565 int: value 566 """ 567 integer_symbols = self._refine_integer_symbols(integer_symbols) 568 569 value = [0] 570 last_power = 0 571 for s in integer_symbols: 572 if isinstance(s, CNDigit): 573 value[-1] = s.int_value 574 elif isinstance(s, CNUnit): 575 value[-1] *= pow(10, s.power) 576 if s.power > last_power: 577 value[:-1] = [v * pow(10, s.power) for v in value[:-1]] # pylint: disable=no-member 578 last_power = s.power 579 value.append(0) 580 return sum(value) 581 582 def int2symbols(self, int_value: Union[int, str]) -> List[SymbolType]: 583 """Integer to symbols 584 585 Example: 586 ```python 587 588 >>> s = NumberingSystem(NumberingType.MID) 589 >>> s.int2symbols(18) 590 [1, 10^1, 8] 591 >>> s.int2symbols(20000180) 592 [2, 10^3, 10^4, 0, 1, 10^2, 8, 10^1] 593 >>> s.int2symbols(26) 594 [2, 10^1, 6] 595 >>> s.int2symbols(320) 596 [3, 10^2, 2, 10^1] 597 >>> s.int2symbols(220) 598 [2, 10^2, 2, 10^1] 599 >>> s.int2symbols("220") 600 [2, 10^2, 2, 10^1] 601 602 ``` 603 604 Args: 605 int_value (Union[int, str]): Value string, e.g. "0.1", "34" 606 607 Returns: 608 List[SymbolType]: List of values 609 """ 610 value_string = str(int_value) 611 striped_string = value_string.lstrip("0") 612 613 # record nothing if all zeros 614 if not striped_string: 615 return [] 616 617 # record one digits 618 if len(striped_string) == 1: 619 result: List[SymbolType] = [self.digits[int(striped_string)]] 620 if len(value_string) != len(striped_string): 621 result = [self.digits[0] for _ in range(len(value_string) - len(striped_string))] + result 622 return result 623 624 # recursively record multiple digits 625 626 # find the unit for the first digit, e.g. 123 -> 10^2 627 result_unit = next(u for u in reversed(self.units) if u.power < len(striped_string)) 628 629 # get the first part of the number, e.g. 123 -> 1 630 result_string = value_string[: -result_unit.power] 631 632 # recursively record the first part of the number, e.g. 123 -> [1, 10^2, 2, 10^1, 3] 633 return self.int2symbols(result_string) + [result_unit] + self.int2symbols(striped_string[-result_unit.power :]) 634 635 def alt_two_symbols(self, integer_symbols: List[SymbolType]) -> List[SymbolType]: 636 """Alternative two symbols 637 e.g. "二百二" to "两百二", "二千二" to "两千二", "三亿零二万二" to "三亿零两万二 638 639 Args: 640 integer_symbols (List[SymbolType]): Symbols 641 642 Returns: 643 List[SymbolType]: Symbols 644 """ 645 liang = self.digits[2] 646 for i, v in enumerate(integer_symbols): 647 if not isinstance(v, CNDigit): 648 continue 649 if v.int_value != 2: 650 continue 651 next_symbol = integer_symbols[i + 1] if i < len(integer_symbols) - 1 else None 652 previous_symbol = integer_symbols[i - 1] if i > 0 else None 653 654 # if the next symbol is not a unit, skip 655 if not isinstance(next_symbol, CNUnit): 656 continue 657 658 # e.g. "一亿零二百" leading_zero = True 659 leading_zero = getattr(previous_symbol, "int_value", "invalid") == 0 # False == 0 in Python 660 661 # e.g. "二百二" to "两百二" 662 previous_is_unit = isinstance(previous_symbol, CNUnit) 663 664 if not (leading_zero or previous_is_unit or (previous_symbol is None)): 665 continue 666 667 # e.g. "二百二" to "两百二", "二千二" to "两千二" 668 if next_symbol.power > 1: 669 integer_symbols[i].simplified = liang.alt_s 670 integer_symbols[i].traditional = liang.alt_t 671 integer_symbols[i].upper_simplified = liang.alt_s 672 integer_symbols[i].upper_traditional = liang.alt_t 673 return integer_symbols 674 675 676def cn2num( 677 chinese_string: str, 678 numbering_type: Union[str, NumberingType] = NumberingType.MID, 679) -> Union[int, float]: 680 """Convert Chinese number to `int` or `float` value。 681 682 Example: 683 ```python 684 685 >>> cn2num("负零点五") 686 -0.5 687 >>> cn2num("一百八") 688 180 689 >>> cn2num("一百八十") 690 180 691 >>> cn2num("一百八点五六七") 692 180.567 693 >>> cn2num("两千万一百八十") 694 20000180 695 >>> cn2num("正两千万一百八十") 696 20000180 697 698 ``` 699 700 Args: 701 chinese_string (str): Chinese number. 702 numbering_type (Union[str, NumberingType], optional): numbering type. Defaults to `NumberingType.MID`. 703 704 Raises: 705 ValueError: Raised when a character is not in the numbering system, e.g. '你' is not a number nor a unit 706 707 Returns: 708 Union[int, float]: `int` or `float` value 709 """ 710 711 numbering_type = NumberingType(numbering_type) if isinstance(numbering_type, str) else numbering_type 712 713 system = NumberingSystem(numbering_type) 714 int_part, dec_part = system.cn2symbols(chinese_string) 715 int_value = system.get_int_value(int_part) 716 717 # skip unit in decimal value 718 dec_str = "".join([str(d.int_value) for d in dec_part if isinstance(d, CNDigit)]) 719 720 result = float(f"{int_value}.{dec_str}") if dec_part else int_value 721 if int_part and int_part[0] == system.math.negative: 722 result = -result 723 return result 724 725 726# pylint: disable-next=too-many-arguments 727def num2cn( 728 num: Union[int, float, str], 729 numbering_type: Union[str, NumberingType] = NumberingType.MID, 730 upper: bool = False, 731 traditional: bool = False, 732 alt_0: bool = False, 733 alt_2: bool = False, 734) -> str: 735 """Integer or float value to Chinese string 736 737 Example: 738 ```python 739 740 >>> num2cn(16) 741 '十六' 742 >>> num2cn(1) 743 '一' 744 >>> num2cn(116) 745 '一百一十六' 746 >>> num2cn(2401, alt_2=True) 747 '两千四百零一' 748 >>> num2cn(101) 749 '一百零一' 750 >>> num2cn(float("3.4"), numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=False) 751 '三点四' 752 >>> num2cn("3.4", numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=False) 753 '三点四' 754 >>> num2cn(23232.005184132423423423300, numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=True) 755 '兩萬叁仟兩佰叁拾贰點零零伍壹捌肆壹叁贰肆贰肆' 756 >>> num2cn("23232.005184132423423423300", numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=True) 757 '兩萬叁仟兩佰叁拾贰點零零伍壹捌肆壹叁贰肆贰叁肆贰叁肆贰叁叁零零' 758 >>> num2cn('023232.005184132423423423300', numbering_type=NumberingType.HIGH, alt_2=False, upper=False, traditional=False) 759 '二万三千二百三十二点零零五一八四一三二四二三四二三四二三三零零' 760 >>> num2cn(111180000) 761 '一亿一千一百一十八万' 762 >>> num2cn(1821010) 763 '一百八十二万一千零一十' 764 >>> num2cn(182.1) 765 '一百八十二点一' 766 >>> num2cn('3.4') 767 '三点四' 768 >>> num2cn(16) 769 '十六' 770 >>> num2cn(10600) 771 '一万零六百' 772 >>> num2cn(110) 773 '一百一' 774 >>> num2cn(1600) 775 '一千六' 776 777 ``` 778 779 Args: 780 num (Tuple[int, float, str]): `int`, `float` or `str` value 781 numbering_type (Union[str, NumberingType], optional): Numbering type. Defaults to `NumberingType.MID`. 782 upper (bool, optional): Capitalized numbers. Defaults to `False`. 783 traditional (bool, optional): Traditional Chinese. Defaults to `False`. 784 alt_0 (bool, optional): Use alternative form of zero. Defaults to `False`. 785 alt_2 (bool, optional): Use alternative form of two. Defaults to `False`. 786 787 Returns: 788 str: Chinese string 789 """ 790 791 if alt_2 and upper: 792 raise ValueError("alt_2 and upper cannot be True at the same time.") 793 794 if alt_0 and upper: 795 raise ValueError("alt_0 and upper cannot be True at the same time.") 796 797 numbering_type = NumberingType(numbering_type) if isinstance(numbering_type, str) else numbering_type 798 system = NumberingSystem(numbering_type) 799 800 num_str = str(num) 801 dec_string = "" 802 803 if "." in num_str: 804 dec_string = num_str.rsplit(".", 1)[-1] 805 # dec_string = dec_string.rstrip("0") 806 807 int_symbols = system.int2symbols(int(float(num))) 808 dec_symbols = [system.digits[int(c)] for c in dec_string] 809 810 # e.g. "二百二" to "两百二", "二千二" to "两千二", "三亿零二万二" to "三亿零两万二 811 if alt_2: 812 int_symbols = system.alt_two_symbols(int_symbols) 813 814 # attribute name for simplified or traditional with upper case or not 815 attr_name = "traditional" if traditional else "simplified" 816 if upper: 817 attr_name = "upper_" + attr_name 818 819 # remove leading '一' for '十', e.g. 一十六 to 十六 in integer part 820 if len(int_symbols) > 1 and (int_symbols[0] == system.digits[1]) and (getattr(int_symbols[1], "power", -1) == 1): 821 int_symbols = int_symbols[1:] 822 823 # remove trailing units, 1600 -> 一千六, 10600 -> 一萬零六百, 101600 -> 十萬一千六 in integer part 824 if len(int_symbols) > 3 and isinstance(int_symbols[-1], CNUnit): 825 if getattr(int_symbols[-3], "power", None) == (int_symbols[-1].power + 1): 826 int_symbols = int_symbols[:-1] 827 828 int_string = "".join(getattr(s, attr_name) for s in int_symbols) 829 int_string = re.sub(r"零+", "零", int_string) # remove multiple zeros in integer part only 830 if not int_string: 831 int_string = "零" 832 833 dec_string = "".join(getattr(s, attr_name) for s in dec_symbols) 834 835 result = int_string 836 837 if dec_string: 838 result += getattr(system.math.point, attr_name) + dec_string 839 840 if alt_0: 841 result = result.replace(getattr(system.digits[0], attr_name), str(system.digits[0].alt_s)) 842 843 return result
18class NumberingType(Enum): 19 r"""Numbering system types: LOW, MID, HIGH 20 21 Chinese numbering types: 22 23 For $i \in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]$: 24 25 26 - `LOW` : $10^{8 + i}$ 27 - `MID` : $10^{8 + i*4}$ 28 - `HIGH` : $10^{2^{i+3}}$ 29 30 --- 31 32 | type | 亿 | 兆 | 京 | 垓 | 秭 | 穰 | 沟 | 涧 | 正 | 载 | 33 |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------| 34 |`low` | $10^{8}$|$10^{9}$ |$10^{10}$|$10^{11}$|$10^{12}$ |$10^{13}$ |$10^{14}$ |$10^{15}$ |$10^{16}$ |$10^{17}$ | 35 |`mid` | $10^{8}$|$10^{12}$|$10^{16}$|$10^{20}$|$10^{24}$ |$10^{28}$ |$10^{32}$ |$10^{36}$ |$10^{40}$ |$10^{44}$ | 36 |`high` | $10^{8}$|$10^{16}$|$10^{32}$|$10^{64}$|$10^{128}$|$10^{256}$|$10^{512}$|$10^{1024}$|$10^{2048}$|$10^{4096}$| 37 """ 38 39 LOW = "low" 40 """ 41 | type | 亿 | 兆 | 京 | 垓 | 秭 | 穰 | 沟 | 涧 | 正 | 载 | 42 |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------| 43 |`low` | $10^{8}$|$10^{9}$ |$10^{10}$|$10^{11}$|$10^{12}$ |$10^{13}$ |$10^{14}$ |$10^{15}$ |$10^{16}$ |$10^{17}$ | 44 """ 45 46 MID = "mid" 47 """ 48 | type | 亿 | 兆 | 京 | 垓 | 秭 | 穰 | 沟 | 涧 | 正 | 载 | 49 |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------| 50 |`mid` | $10^{8}$|$10^{12}$|$10^{16}$|$10^{20}$|$10^{24}$ |$10^{28}$ |$10^{32}$ |$10^{36}$ |$10^{40}$ |$10^{44}$ | 51 """ 52 53 HIGH = "high" 54 """ 55 | type | 亿 | 兆 | 京 | 垓 | 秭 | 穰 | 沟 | 涧 | 正 | 载 | 56 |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------| 57 |`high` | $10^{8}$|$10^{16}$|$10^{32}$|$10^{64}$|$10^{128}$|$10^{256}$|$10^{512}$|$10^{1024}$|$10^{2048}$|$10^{4096}$| 58 """ 59 60 @property 61 def powers(self) -> List[int]: 62 """Powers of units for each numbering type""" 63 return { 64 NumberingType.LOW: [8 + i for i in range(10)], 65 NumberingType.MID: [8 + 4 * i for i in range(10)], 66 NumberingType.HIGH: [2 ** (i + 3) for i in range(10)], 67 }[self]
Numbering system types: LOW, MID, HIGH
Chinese numbering types:
For $i \in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]$:
type | 亿 | 兆 | 京 | 垓 | 秭 | 穰 | 沟 | 涧 | 正 | 载 |
---|---|---|---|---|---|---|---|---|---|---|
low |
$10^{8}$ | $10^{9}$ | $10^{10}$ | $10^{11}$ | $10^{12}$ | $10^{13}$ | $10^{14}$ | $10^{15}$ | $10^{16}$ | $10^{17}$ |
mid |
$10^{8}$ | $10^{12}$ | $10^{16}$ | $10^{20}$ | $10^{24}$ | $10^{28}$ | $10^{32}$ | $10^{36}$ | $10^{40}$ | $10^{44}$ |
high |
$10^{8}$ | $10^{16}$ | $10^{32}$ | $10^{64}$ | $10^{128}$ | $10^{256}$ | $10^{512}$ | $10^{1024}$ | $10^{2048}$ | $10^{4096}$ |
type | 亿 | 兆 | 京 | 垓 | 秭 | 穰 | 沟 | 涧 | 正 | 载 |
---|---|---|---|---|---|---|---|---|---|---|
low |
$10^{8}$ | $10^{9}$ | $10^{10}$ | $10^{11}$ | $10^{12}$ | $10^{13}$ | $10^{14}$ | $10^{15}$ | $10^{16}$ | $10^{17}$ |
type | 亿 | 兆 | 京 | 垓 | 秭 | 穰 | 沟 | 涧 | 正 | 载 |
---|---|---|---|---|---|---|---|---|---|---|
mid |
$10^{8}$ | $10^{12}$ | $10^{16}$ | $10^{20}$ | $10^{24}$ | $10^{28}$ | $10^{32}$ | $10^{36}$ | $10^{40}$ | $10^{44}$ |
type | 亿 | 兆 | 京 | 垓 | 秭 | 穰 | 沟 | 涧 | 正 | 载 |
---|---|---|---|---|---|---|---|---|---|---|
high |
$10^{8}$ | $10^{16}$ | $10^{32}$ | $10^{64}$ | $10^{128}$ | $10^{256}$ | $10^{512}$ | $10^{1024}$ | $10^{2048}$ | $10^{4096}$ |
60 @property 61 def powers(self) -> List[int]: 62 """Powers of units for each numbering type""" 63 return { 64 NumberingType.LOW: [8 + i for i in range(10)], 65 NumberingType.MID: [8 + 4 * i for i in range(10)], 66 NumberingType.HIGH: [2 ** (i + 3) for i in range(10)], 67 }[self]
Powers of units for each numbering type
Inherited Members
- enum.Enum
- name
- value
72class CNChar: 73 """Base Chinese char class. 74 75 Each object has simplified and traditional strings. 76 When converted to string, it will shows the simplified string or traditional string or space `' '`. 77 78 Example: 79 ```python 80 81 >>> negative = CNChar(simplified="负", traditional="負") 82 >>> negative.simplified 83 '负' 84 >>> negative.traditional 85 '負' 86 >>> negative.upper_simplified 87 '负' 88 >>> negative.upper_traditional 89 '負' 90 91 ``` 92 """ 93 94 simplified: Optional[str] = None 95 """Simplified Chinese character""" 96 97 traditional: Optional[str] = None 98 """Traditional Chinese character""" 99 100 upper_simplified: Optional[str] = None 101 """Capitalized character in simplified Chinese. Defaults to `None` means same as `self.simplified`.""" 102 103 upper_traditional: Optional[str] = None 104 """Capitalized character in traditional Chinese. Defaults to `None` means same as `self.traditional`.""" 105 106 @property 107 def all_forms(self) -> str: 108 """All forms of the character""" 109 return "".join(v for v in self.__dict__.values() if isinstance(v, str)) 110 111 def __post_init__(self): 112 """Post initialization""" 113 self.simplified = self.simplified or "" 114 self.traditional = self.traditional or self.simplified 115 self.upper_simplified = self.upper_simplified or self.simplified 116 self.upper_traditional = self.upper_traditional or self.traditional 117 118 def __str__(self) -> str: 119 return self.simplified if self.simplified else f"Empty {self.__class__.__name__}" 120 121 def __repr__(self) -> str: 122 return str(self)
Base Chinese char class.
Each object has simplified and traditional strings.
When converted to string, it will shows the simplified string or traditional string or space ' '
.
Example:
>>> negative = CNChar(simplified="负", traditional="負")
>>> negative.simplified
'负'
>>> negative.traditional
'負'
>>> negative.upper_simplified
'负'
>>> negative.upper_traditional
'負'
Capitalized character in simplified Chinese. Defaults to None
means same as self.simplified
.
126class CNUnit(CNChar): 127 """Chinese number unit class 128 129 Each of it is an `CNChar` with additional upper type strings. 130 131 Example: 132 ```python 133 134 >>> wan = CNUnit(*"万萬萬萬", power=4) 135 >>> wan 136 10^4 137 138 ``` 139 """ 140 141 power: int = 0 142 r"""The power of this unit, e.g. `power` = 4 for `'万'` ( \(10^4\) )""" 143 144 def __str__(self) -> str: 145 return f"10^{self.power}" 146 147 def __repr__(self) -> str: 148 return str(self) 149 150 @classmethod 151 def create(cls, power: int, numbering_type: NumberingType = NumberingType.MID) -> CNUnit: 152 """Create one unit character based on power value from constants 153 154 - `SMALLER_CHINESE_NUMBERING_UNITS_SIMPLIFIED` 155 - `SMALLER_CHINESE_NUMBERING_UNITS_TRADITIONAL` 156 - `LARGER_CHINESE_NUMBERING_UNITS_SIMPLIFIED` 157 - `LARGER_CHINESE_NUMBERING_UNITS_TRADITIONAL` 158 159 Example: 160 ```python 161 162 >>> CNUnit.create(power=8, numbering_type=NumberingType.LOW).simplified 163 '亿' 164 >>> CNUnit.create(power=12, numbering_type=NumberingType.LOW).simplified 165 '秭' 166 >>> CNUnit.create(power=12, numbering_type=NumberingType.HIGH).simplified 167 Traceback (most recent call last): 168 ... 169 ValueError: power = 12 is invalid for numbering_type = <NumberingType.HIGH: 'high'> 170 171 ``` 172 173 Args: 174 power (int): Unit power, starting from 1. 175 numbering_type (NumberingType, optional): Numbering type. Defaults to `NumberingType.MID`. 176 177 Raises: 178 ValueError: Raised when invalid `numbering_type` is provided 179 180 Returns: 181 CNUnit: Created unit object 182 """ 183 assert power > 0, "Power should be greater than 0." 184 185 if power < 5: 186 return cls(*UNITS[power - 1], power=power) # type: ignore[misc] 187 188 i = float(power - 8) 189 if numbering_type == NumberingType.LOW: 190 pass 191 elif numbering_type == NumberingType.MID: 192 i = i / 4 193 elif numbering_type == NumberingType.HIGH: 194 i = log2(power) - 3 195 else: 196 raise ValueError(f"Numbering type should be in {NumberingType} but {numbering_type} is provided.") 197 198 i = int(i) if i.is_integer() else -1 199 200 if i < 0: 201 raise ValueError(f"{power = } is invalid for {numbering_type = }") 202 203 return cls(*UNITS[i + 4], power=power) # type: ignore[misc]
Chinese number unit class
Each of it is an CNChar
with additional upper type strings.
Example:
>>> wan = CNUnit(*"万萬萬萬", power=4)
>>> wan
10^4
150 @classmethod 151 def create(cls, power: int, numbering_type: NumberingType = NumberingType.MID) -> CNUnit: 152 """Create one unit character based on power value from constants 153 154 - `SMALLER_CHINESE_NUMBERING_UNITS_SIMPLIFIED` 155 - `SMALLER_CHINESE_NUMBERING_UNITS_TRADITIONAL` 156 - `LARGER_CHINESE_NUMBERING_UNITS_SIMPLIFIED` 157 - `LARGER_CHINESE_NUMBERING_UNITS_TRADITIONAL` 158 159 Example: 160 ```python 161 162 >>> CNUnit.create(power=8, numbering_type=NumberingType.LOW).simplified 163 '亿' 164 >>> CNUnit.create(power=12, numbering_type=NumberingType.LOW).simplified 165 '秭' 166 >>> CNUnit.create(power=12, numbering_type=NumberingType.HIGH).simplified 167 Traceback (most recent call last): 168 ... 169 ValueError: power = 12 is invalid for numbering_type = <NumberingType.HIGH: 'high'> 170 171 ``` 172 173 Args: 174 power (int): Unit power, starting from 1. 175 numbering_type (NumberingType, optional): Numbering type. Defaults to `NumberingType.MID`. 176 177 Raises: 178 ValueError: Raised when invalid `numbering_type` is provided 179 180 Returns: 181 CNUnit: Created unit object 182 """ 183 assert power > 0, "Power should be greater than 0." 184 185 if power < 5: 186 return cls(*UNITS[power - 1], power=power) # type: ignore[misc] 187 188 i = float(power - 8) 189 if numbering_type == NumberingType.LOW: 190 pass 191 elif numbering_type == NumberingType.MID: 192 i = i / 4 193 elif numbering_type == NumberingType.HIGH: 194 i = log2(power) - 3 195 else: 196 raise ValueError(f"Numbering type should be in {NumberingType} but {numbering_type} is provided.") 197 198 i = int(i) if i.is_integer() else -1 199 200 if i < 0: 201 raise ValueError(f"{power = } is invalid for {numbering_type = }") 202 203 return cls(*UNITS[i + 4], power=power) # type: ignore[misc]
Create one unit character based on power value from constants
SMALLER_CHINESE_NUMBERING_UNITS_SIMPLIFIED
SMALLER_CHINESE_NUMBERING_UNITS_TRADITIONAL
LARGER_CHINESE_NUMBERING_UNITS_SIMPLIFIED
LARGER_CHINESE_NUMBERING_UNITS_TRADITIONAL
Example:
>>> CNUnit.create(power=8, numbering_type=NumberingType.LOW).simplified
'亿'
>>> CNUnit.create(power=12, numbering_type=NumberingType.LOW).simplified
'秭'
>>> CNUnit.create(power=12, numbering_type=NumberingType.HIGH).simplified
Traceback (most recent call last):
...
ValueError: power = 12 is invalid for numbering_type = <NumberingType.HIGH: 'high'>
Arguments:
- power (int): Unit power, starting from 1.
- numbering_type (NumberingType, optional): Numbering type. Defaults to
NumberingType.MID
.
Raises:
- ValueError: Raised when invalid
numbering_type
is provided
Returns:
CNUnit: Created unit object
Inherited Members
207class CNDigit(CNChar): 208 """Chinese number digit class 209 210 Example: 211 ```python 212 213 >>> CNDigit(*"三叁叁叁", int_value=3) 214 3 215 216 ``` 217 """ 218 219 int_value: int = 0 220 """Integer value of the digit, 0 to 9. Defaults to 0.""" 221 222 alt_s: Optional[str] = None 223 """Alternative simplified character, e.g. '两' for 2. Defaults to `None`. 224 """ 225 226 alt_t: Optional[str] = None 227 """Alternative traditional character, e.g. '俩' for 2. Defaults to `None`. 228 """ 229 230 def __str__(self): 231 return str(self.int_value) 232 233 def __repr__(self): 234 return str(self)
Chinese number digit class
Example:
>>> CNDigit(*"三叁叁叁", int_value=3)
3
Alternative simplified character, e.g. '两' for 2. Defaults to None
.
Alternative traditional character, e.g. '俩' for 2. Defaults to None
.
Inherited Members
238class CNMath(CNChar): 239 """ 240 Chinese math operators 241 242 Example: 243 ```python 244 245 >>> positive = CNMath(*"正正正正", symbol="+", expression=lambda x: +x) 246 >>> positive.symbol 247 '+' 248 249 ``` 250 """ 251 252 symbol: str = "" 253 """Mathematical symbol, e.g. '+'. Defaults to ``.""" 254 255 expression: Optional[Callable] = None 256 """Mathematical expression, e.g. `lambda x: +x`. Defaults to `None`.""" 257 258 def __str__(self): 259 return self.symbol 260 261 def __repr__(self): 262 return str(self)
Chinese math operators
Example:
>>> positive = CNMath(*"正正正正", symbol="+", expression=lambda x: +x)
>>> positive.symbol
'+'
Mathematical expression, e.g. lambda x: +x
. Defaults to None
.
Inherited Members
269class MathSymbols: 270 """Math symbols used in Chinese for both traditional and simplified Chinese 271 272 - positive = ['正', '正'] 273 - negative = ['负', '負'] 274 - point = ['点', '點'] 275 276 Used in `NumberingSystem`. 277 278 Example: 279 ```python 280 281 >>> positive = CNMath(*"正正正正", symbol="+", expression=lambda x: +x) 282 >>> negative = CNMath(*"负負负負", symbol="-", expression=lambda x: -x) 283 >>> point = CNMath(*"点點点點", symbol=".", expression=lambda integer, decimal: float(str(integer) + "." + str(decimal))) 284 >>> math = MathSymbols(positive, negative, point) 285 >>> math.positive 286 + 287 >>> list(math) 288 [+, -, .] 289 >>> for i in math: 290 ... print(i) 291 + 292 - 293 . 294 295 ``` 296 """ 297 298 positive: CNMath 299 """Positive""" 300 301 negative: CNMath 302 """Negative""" 303 304 point: CNMath 305 """Decimal point""" 306 307 def __iter__(self): 308 for v in self.__dict__.values(): 309 yield v
Math symbols used in Chinese for both traditional and simplified Chinese
- positive = ['正', '正']
- negative = ['负', '負']
- point = ['点', '點']
Used in NumberingSystem
.
Example:
>>> positive = CNMath(*"正正正正", symbol="+", expression=lambda x: +x)
>>> negative = CNMath(*"负負负負", symbol="-", expression=lambda x: -x)
>>> point = CNMath(*"点點点點", symbol=".", expression=lambda integer, decimal: float(str(integer) + "." + str(decimal)))
>>> math = MathSymbols(positive, negative, point)
>>> math.positive
+
>>> list(math)
[+, -, .]
>>> for i in math:
... print(i)
+
-
.
312class NumberingSystem: 313 """Numbering system class 314 315 Example: 316 ```python 317 318 >>> system = NumberingSystem(NumberingType.MID) 319 >>> system.numbering_type 320 <NumberingType.MID: 'mid'> 321 >>> system.digits[0] 322 0 323 >>> system.units[0] 324 10^1 325 >>> system.units[7].simplified 326 '垓' 327 >>> system.math.positive 328 + 329 330 ``` 331 """ 332 333 # region: fields 334 _numbering_type: NumberingType 335 """Numbering type""" 336 337 _digits: List[CNDigit] 338 """Digits""" 339 340 _units: List[CNUnit] 341 """Units""" 342 343 _maths: MathSymbols 344 """Math symbols""" 345 346 @property 347 def numbering_type(self) -> NumberingType: 348 """Numbering type""" 349 return self._numbering_type 350 351 @numbering_type.setter 352 def numbering_type(self, value: NumberingType): 353 self._numbering_type = value 354 self._units[4:] = [CNUnit.create(i, self._numbering_type) for i in self._numbering_type.powers] 355 356 @property 357 def digits(self) -> List[CNDigit]: 358 """Digits""" 359 result = [CNDigit(*v, int_value=i) for i, v in enumerate(DIGITS)] # type: ignore[misc] 360 result[0].alt_s, result[0].alt_t = "〇", "〇" 361 result[2].alt_s, result[2].alt_t = "两", "兩" 362 return result 363 364 @property 365 def units(self) -> List[CNUnit]: 366 """Units""" 367 return self._units 368 369 @cached_property 370 def math(self) -> MathSymbols: 371 """Math symbols""" 372 positive_cn = CNMath(*"正正", symbol="+", expression=lambda x: x) # type: ignore[misc] 373 negative_cn = CNMath(*"负負", symbol="-", expression=lambda x: -x) # type: ignore[misc] 374 point_cn = CNMath(*"点點", symbol=".", expression=lambda i, d: float(f"{i}.{d}")) # type: ignore[misc] 375 return MathSymbols(positive_cn, negative_cn, point_cn) 376 377 # endregion: fields 378 379 def __init__(self, numbering_type: NumberingType = NumberingType.MID) -> None: 380 """Construction""" 381 self._numbering_type = numbering_type 382 self._units = [CNUnit(*UNITS[i], power=i + 1) for i in range(4)] # type: ignore[misc] 383 self._units[4:] = [CNUnit.create(i, self._numbering_type) for i in self._numbering_type.powers] 384 385 def __getitem__(self, key: str) -> SymbolType: 386 if not isinstance(key, str): 387 raise ValueError(f"{key = } should be a string.") 388 389 for c in self.units + self.digits + list(self.math): 390 if key in c.all_forms: 391 return c 392 393 raise ValueError(f"{key} is not in {self.numbering_type.name} system.") 394 395 def cn2symbols(self, cn_str: str) -> Tuple[List[SymbolType], List[SymbolType]]: 396 """Chinese string to symbols 397 398 Example: 399 ```python 400 401 >>> system = NumberingSystem(NumberingType.MID) 402 >>> system.cn2symbols("一百八") 403 ([1, 10^2, 8], []) 404 >>> system.cn2symbols("一百八十") 405 ([1, 10^2, 8, 10^1], []) 406 >>> system.cn2symbols("一百八点五六七") 407 ([1, 10^2, 8], [5, 6, 7]) 408 >>> system.cn2symbols("两千万一百八十") 409 ([2, 10^7, 1, 10^2, 8, 10^1], []) 410 >>> system.cn2symbols("正两千万一百八十") 411 ([+, 2, 10^7, 1, 10^2, 8, 10^1], []) 412 >>> system.cn2symbols("负两千万一百八十") 413 ([-, 2, 10^7, 1, 10^2, 8, 10^1], []) 414 >>> system.cn2symbols("点负两千万一百八十") 415 Traceback (most recent call last): 416 ... 417 ValueError: First symbol in decimal part should not be a math symbol, - is provided. 418 >>> system.cn2symbols("两千万点一百点八十") 419 Traceback (most recent call last): 420 ... 421 ValueError: Multiple points in the number 两千万点一百点八十. 422 >>> system.cn2symbols("两千万点一百點八十") 423 Traceback (most recent call last): 424 ... 425 ValueError: Multiple points in the number 两千万点一百點八十. 426 427 ``` 428 429 Args: 430 cn_str (str): Chinese number 431 432 Returns: 433 Tuple[List[SymbolType], List[SymbolType]]: Integer symbols, decimal symbols 434 """ 435 if cn_str == "": 436 return [], [] 437 438 int_part, dec_part = cn_str, "" 439 int_dec = re.split(r"\.|点|點", cn_str) 440 if len(int_dec) > 2: 441 raise ValueError(f"Multiple points in the number {cn_str}.") 442 int_part, dec_part = int_dec if len(int_dec) == 2 else (int_dec[0], "") 443 444 integer_value = [copy.deepcopy(self[c]) for c in int_part] 445 446 for i, v in enumerate(integer_value): 447 if not isinstance(v, CNUnit): 448 continue 449 if i + 1 < len(integer_value) and isinstance(integer_value[i + 1], CNUnit): 450 v.power += integer_value[i + 1].power # type: ignore[union-attr] 451 integer_value[i + 1] = None # type: ignore[union-attr] 452 453 integer_value = [v for v in integer_value if v is not None] 454 455 for i, v in enumerate(integer_value): 456 if not isinstance(v, CNUnit): 457 continue 458 for u in integer_value[i + 1 :]: 459 if isinstance(u, CNUnit) and u.power > v.power: 460 v.power += u.power 461 break 462 463 decimal_value = [copy.deepcopy(self[c]) for c in dec_part] 464 465 # if first symbol is a math symbol, e.g. '正两千万一百八十' 466 if int_part and (first_symbol := [c for c in self.math if int_part[0] in c.all_forms]): 467 integer_value[0] = first_symbol[0] 468 469 # if first symbol is a math symbol, e.g. '点负两千万一百八十' 470 if decimal_value and (decimal_value[0] in self.math): 471 raise ValueError( 472 f"First symbol in decimal part should not be a math symbol, {decimal_value[0]} is provided." 473 ) 474 475 return integer_value, decimal_value 476 477 def _refine_integer_symbols(self, integer_symbols: List[SymbolType]) -> List[SymbolType]: 478 """Refine integer symbols. Do not change math symbols. 479 480 Example: 481 ```python 482 483 >>> s = NumberingSystem(NumberingType.MID) 484 >>> s._refine_integer_symbols(s.cn2symbols("十八")[0]) 485 [1, 10^1, 8, 10^0] 486 >>> s._refine_integer_symbols(s.cn2symbols("两千万一百八十")[0]) 487 [2, 10^7, 1, 10^2, 8, 10^1] 488 >>> s._refine_integer_symbols(s.cn2symbols("两千万零一百八十")[0]) 489 [2, 10^7, 1, 10^2, 8, 10^1] 490 >>> s._refine_integer_symbols(s.cn2symbols("两亿六")[0]) 491 [2, 10^8, 6, 10^7] 492 493 ``` 494 495 Args: 496 integer_symbols (List[SymbolType]): Raw integer symbols 497 498 Returns: 499 List[SymbolType]: Refined symbols 500 """ 501 if not integer_symbols: 502 return integer_symbols 503 504 # first symbol is unit, add 1 before it, e.g. , "十五" to "一十五" 505 if isinstance(integer_symbols[0], CNUnit) and integer_symbols[0].power == 1: 506 integer_symbols = [self.digits[1]] + integer_symbols 507 508 # last symbol is digit and the second last symbol is unit 509 # e.g. "十 五" to "十 五 10^0", "二 百 五" to "二 百 五 10^1" 510 if len(integer_symbols) > 1: 511 if isinstance(integer_symbols[-1], CNDigit) and isinstance(integer_symbols[-2], CNUnit): 512 # add a dummy unit 513 integer_symbols += [CNUnit(power=integer_symbols[-2].power - 1)] 514 515 result: List[SymbolType] = [] 516 unit_count = 0 517 for s in integer_symbols: 518 if isinstance(s, CNMath): 519 # reset unit_count, e.g. "两千万" has two units 520 unit_count = 0 521 continue 522 if isinstance(s, CNDigit): 523 # reset unit_count, e.g. "两千万" has two units 524 unit_count = 0 525 if s.int_value > 0: 526 result.append(s) 527 continue 528 if not isinstance(s, CNUnit): 529 raise ValueError(f"Invalid symbol {s} in {integer_symbols}.") 530 531 # create a dummy unit 532 current_unit = CNUnit("", "", "", "", s.power) 533 unit_count += 1 534 535 # store the first met unit 536 if unit_count == 1: 537 result.append(current_unit) 538 continue 539 540 # if there are more than one units, sum them, e.g. "两 千 万" to "两 10^7" 541 result[-1].power += current_unit.power # type: ignore[union-attr] 542 if integer_symbols[0] == self.math.negative: 543 result = [self.math.negative] + result 544 return result 545 546 def get_int_value(self, integer_symbols: List[SymbolType]) -> int: 547 """Compute the value from symbol 548 549 Example: 550 ```python 551 552 >>> s = NumberingSystem(NumberingType.MID) 553 >>> s.get_int_value(s.cn2symbols("十八")[0]) 554 18 555 >>> s.get_int_value(s.cn2symbols("两千万一百八十")[0]) 556 20000180 557 >>> s.get_int_value(s.cn2symbols("两亿六")[0]) 558 260000000 559 560 ``` 561 562 Args: 563 integer_symbols (List[SymbolType]): Symbols, without point 564 565 Returns: 566 int: value 567 """ 568 integer_symbols = self._refine_integer_symbols(integer_symbols) 569 570 value = [0] 571 last_power = 0 572 for s in integer_symbols: 573 if isinstance(s, CNDigit): 574 value[-1] = s.int_value 575 elif isinstance(s, CNUnit): 576 value[-1] *= pow(10, s.power) 577 if s.power > last_power: 578 value[:-1] = [v * pow(10, s.power) for v in value[:-1]] # pylint: disable=no-member 579 last_power = s.power 580 value.append(0) 581 return sum(value) 582 583 def int2symbols(self, int_value: Union[int, str]) -> List[SymbolType]: 584 """Integer to symbols 585 586 Example: 587 ```python 588 589 >>> s = NumberingSystem(NumberingType.MID) 590 >>> s.int2symbols(18) 591 [1, 10^1, 8] 592 >>> s.int2symbols(20000180) 593 [2, 10^3, 10^4, 0, 1, 10^2, 8, 10^1] 594 >>> s.int2symbols(26) 595 [2, 10^1, 6] 596 >>> s.int2symbols(320) 597 [3, 10^2, 2, 10^1] 598 >>> s.int2symbols(220) 599 [2, 10^2, 2, 10^1] 600 >>> s.int2symbols("220") 601 [2, 10^2, 2, 10^1] 602 603 ``` 604 605 Args: 606 int_value (Union[int, str]): Value string, e.g. "0.1", "34" 607 608 Returns: 609 List[SymbolType]: List of values 610 """ 611 value_string = str(int_value) 612 striped_string = value_string.lstrip("0") 613 614 # record nothing if all zeros 615 if not striped_string: 616 return [] 617 618 # record one digits 619 if len(striped_string) == 1: 620 result: List[SymbolType] = [self.digits[int(striped_string)]] 621 if len(value_string) != len(striped_string): 622 result = [self.digits[0] for _ in range(len(value_string) - len(striped_string))] + result 623 return result 624 625 # recursively record multiple digits 626 627 # find the unit for the first digit, e.g. 123 -> 10^2 628 result_unit = next(u for u in reversed(self.units) if u.power < len(striped_string)) 629 630 # get the first part of the number, e.g. 123 -> 1 631 result_string = value_string[: -result_unit.power] 632 633 # recursively record the first part of the number, e.g. 123 -> [1, 10^2, 2, 10^1, 3] 634 return self.int2symbols(result_string) + [result_unit] + self.int2symbols(striped_string[-result_unit.power :]) 635 636 def alt_two_symbols(self, integer_symbols: List[SymbolType]) -> List[SymbolType]: 637 """Alternative two symbols 638 e.g. "二百二" to "两百二", "二千二" to "两千二", "三亿零二万二" to "三亿零两万二 639 640 Args: 641 integer_symbols (List[SymbolType]): Symbols 642 643 Returns: 644 List[SymbolType]: Symbols 645 """ 646 liang = self.digits[2] 647 for i, v in enumerate(integer_symbols): 648 if not isinstance(v, CNDigit): 649 continue 650 if v.int_value != 2: 651 continue 652 next_symbol = integer_symbols[i + 1] if i < len(integer_symbols) - 1 else None 653 previous_symbol = integer_symbols[i - 1] if i > 0 else None 654 655 # if the next symbol is not a unit, skip 656 if not isinstance(next_symbol, CNUnit): 657 continue 658 659 # e.g. "一亿零二百" leading_zero = True 660 leading_zero = getattr(previous_symbol, "int_value", "invalid") == 0 # False == 0 in Python 661 662 # e.g. "二百二" to "两百二" 663 previous_is_unit = isinstance(previous_symbol, CNUnit) 664 665 if not (leading_zero or previous_is_unit or (previous_symbol is None)): 666 continue 667 668 # e.g. "二百二" to "两百二", "二千二" to "两千二" 669 if next_symbol.power > 1: 670 integer_symbols[i].simplified = liang.alt_s 671 integer_symbols[i].traditional = liang.alt_t 672 integer_symbols[i].upper_simplified = liang.alt_s 673 integer_symbols[i].upper_traditional = liang.alt_t 674 return integer_symbols
Numbering system class
Example:
>>> system = NumberingSystem(NumberingType.MID)
>>> system.numbering_type
<NumberingType.MID: 'mid'>
>>> system.digits[0]
0
>>> system.units[0]
10^1
>>> system.units[7].simplified
'垓'
>>> system.math.positive
+
379 def __init__(self, numbering_type: NumberingType = NumberingType.MID) -> None: 380 """Construction""" 381 self._numbering_type = numbering_type 382 self._units = [CNUnit(*UNITS[i], power=i + 1) for i in range(4)] # type: ignore[misc] 383 self._units[4:] = [CNUnit.create(i, self._numbering_type) for i in self._numbering_type.powers]
Construction
346 @property 347 def numbering_type(self) -> NumberingType: 348 """Numbering type""" 349 return self._numbering_type
Numbering type
356 @property 357 def digits(self) -> List[CNDigit]: 358 """Digits""" 359 result = [CNDigit(*v, int_value=i) for i, v in enumerate(DIGITS)] # type: ignore[misc] 360 result[0].alt_s, result[0].alt_t = "〇", "〇" 361 result[2].alt_s, result[2].alt_t = "两", "兩" 362 return result
Digits
369 @cached_property 370 def math(self) -> MathSymbols: 371 """Math symbols""" 372 positive_cn = CNMath(*"正正", symbol="+", expression=lambda x: x) # type: ignore[misc] 373 negative_cn = CNMath(*"负負", symbol="-", expression=lambda x: -x) # type: ignore[misc] 374 point_cn = CNMath(*"点點", symbol=".", expression=lambda i, d: float(f"{i}.{d}")) # type: ignore[misc] 375 return MathSymbols(positive_cn, negative_cn, point_cn)
Math symbols
395 def cn2symbols(self, cn_str: str) -> Tuple[List[SymbolType], List[SymbolType]]: 396 """Chinese string to symbols 397 398 Example: 399 ```python 400 401 >>> system = NumberingSystem(NumberingType.MID) 402 >>> system.cn2symbols("一百八") 403 ([1, 10^2, 8], []) 404 >>> system.cn2symbols("一百八十") 405 ([1, 10^2, 8, 10^1], []) 406 >>> system.cn2symbols("一百八点五六七") 407 ([1, 10^2, 8], [5, 6, 7]) 408 >>> system.cn2symbols("两千万一百八十") 409 ([2, 10^7, 1, 10^2, 8, 10^1], []) 410 >>> system.cn2symbols("正两千万一百八十") 411 ([+, 2, 10^7, 1, 10^2, 8, 10^1], []) 412 >>> system.cn2symbols("负两千万一百八十") 413 ([-, 2, 10^7, 1, 10^2, 8, 10^1], []) 414 >>> system.cn2symbols("点负两千万一百八十") 415 Traceback (most recent call last): 416 ... 417 ValueError: First symbol in decimal part should not be a math symbol, - is provided. 418 >>> system.cn2symbols("两千万点一百点八十") 419 Traceback (most recent call last): 420 ... 421 ValueError: Multiple points in the number 两千万点一百点八十. 422 >>> system.cn2symbols("两千万点一百點八十") 423 Traceback (most recent call last): 424 ... 425 ValueError: Multiple points in the number 两千万点一百點八十. 426 427 ``` 428 429 Args: 430 cn_str (str): Chinese number 431 432 Returns: 433 Tuple[List[SymbolType], List[SymbolType]]: Integer symbols, decimal symbols 434 """ 435 if cn_str == "": 436 return [], [] 437 438 int_part, dec_part = cn_str, "" 439 int_dec = re.split(r"\.|点|點", cn_str) 440 if len(int_dec) > 2: 441 raise ValueError(f"Multiple points in the number {cn_str}.") 442 int_part, dec_part = int_dec if len(int_dec) == 2 else (int_dec[0], "") 443 444 integer_value = [copy.deepcopy(self[c]) for c in int_part] 445 446 for i, v in enumerate(integer_value): 447 if not isinstance(v, CNUnit): 448 continue 449 if i + 1 < len(integer_value) and isinstance(integer_value[i + 1], CNUnit): 450 v.power += integer_value[i + 1].power # type: ignore[union-attr] 451 integer_value[i + 1] = None # type: ignore[union-attr] 452 453 integer_value = [v for v in integer_value if v is not None] 454 455 for i, v in enumerate(integer_value): 456 if not isinstance(v, CNUnit): 457 continue 458 for u in integer_value[i + 1 :]: 459 if isinstance(u, CNUnit) and u.power > v.power: 460 v.power += u.power 461 break 462 463 decimal_value = [copy.deepcopy(self[c]) for c in dec_part] 464 465 # if first symbol is a math symbol, e.g. '正两千万一百八十' 466 if int_part and (first_symbol := [c for c in self.math if int_part[0] in c.all_forms]): 467 integer_value[0] = first_symbol[0] 468 469 # if first symbol is a math symbol, e.g. '点负两千万一百八十' 470 if decimal_value and (decimal_value[0] in self.math): 471 raise ValueError( 472 f"First symbol in decimal part should not be a math symbol, {decimal_value[0]} is provided." 473 ) 474 475 return integer_value, decimal_value
Chinese string to symbols
Example:
>>> system = NumberingSystem(NumberingType.MID)
>>> system.cn2symbols("一百八")
([1, 10^2, 8], [])
>>> system.cn2symbols("一百八十")
([1, 10^2, 8, 10^1], [])
>>> system.cn2symbols("一百八点五六七")
([1, 10^2, 8], [5, 6, 7])
>>> system.cn2symbols("两千万一百八十")
([2, 10^7, 1, 10^2, 8, 10^1], [])
>>> system.cn2symbols("正两千万一百八十")
([+, 2, 10^7, 1, 10^2, 8, 10^1], [])
>>> system.cn2symbols("负两千万一百八十")
([-, 2, 10^7, 1, 10^2, 8, 10^1], [])
>>> system.cn2symbols("点负两千万一百八十")
Traceback (most recent call last):
...
ValueError: First symbol in decimal part should not be a math symbol, - is provided.
>>> system.cn2symbols("两千万点一百点八十")
Traceback (most recent call last):
...
ValueError: Multiple points in the number 两千万点一百点八十.
>>> system.cn2symbols("两千万点一百點八十")
Traceback (most recent call last):
...
ValueError: Multiple points in the number 两千万点一百點八十.
Arguments:
- cn_str (str): Chinese number
Returns:
Tuple[List[SymbolType], List[SymbolType]]: Integer symbols, decimal symbols
546 def get_int_value(self, integer_symbols: List[SymbolType]) -> int: 547 """Compute the value from symbol 548 549 Example: 550 ```python 551 552 >>> s = NumberingSystem(NumberingType.MID) 553 >>> s.get_int_value(s.cn2symbols("十八")[0]) 554 18 555 >>> s.get_int_value(s.cn2symbols("两千万一百八十")[0]) 556 20000180 557 >>> s.get_int_value(s.cn2symbols("两亿六")[0]) 558 260000000 559 560 ``` 561 562 Args: 563 integer_symbols (List[SymbolType]): Symbols, without point 564 565 Returns: 566 int: value 567 """ 568 integer_symbols = self._refine_integer_symbols(integer_symbols) 569 570 value = [0] 571 last_power = 0 572 for s in integer_symbols: 573 if isinstance(s, CNDigit): 574 value[-1] = s.int_value 575 elif isinstance(s, CNUnit): 576 value[-1] *= pow(10, s.power) 577 if s.power > last_power: 578 value[:-1] = [v * pow(10, s.power) for v in value[:-1]] # pylint: disable=no-member 579 last_power = s.power 580 value.append(0) 581 return sum(value)
Compute the value from symbol
Example:
>>> s = NumberingSystem(NumberingType.MID)
>>> s.get_int_value(s.cn2symbols("十八")[0])
18
>>> s.get_int_value(s.cn2symbols("两千万一百八十")[0])
20000180
>>> s.get_int_value(s.cn2symbols("两亿六")[0])
260000000
Arguments:
- integer_symbols (List[SymbolType]): Symbols, without point
Returns:
int: value
583 def int2symbols(self, int_value: Union[int, str]) -> List[SymbolType]: 584 """Integer to symbols 585 586 Example: 587 ```python 588 589 >>> s = NumberingSystem(NumberingType.MID) 590 >>> s.int2symbols(18) 591 [1, 10^1, 8] 592 >>> s.int2symbols(20000180) 593 [2, 10^3, 10^4, 0, 1, 10^2, 8, 10^1] 594 >>> s.int2symbols(26) 595 [2, 10^1, 6] 596 >>> s.int2symbols(320) 597 [3, 10^2, 2, 10^1] 598 >>> s.int2symbols(220) 599 [2, 10^2, 2, 10^1] 600 >>> s.int2symbols("220") 601 [2, 10^2, 2, 10^1] 602 603 ``` 604 605 Args: 606 int_value (Union[int, str]): Value string, e.g. "0.1", "34" 607 608 Returns: 609 List[SymbolType]: List of values 610 """ 611 value_string = str(int_value) 612 striped_string = value_string.lstrip("0") 613 614 # record nothing if all zeros 615 if not striped_string: 616 return [] 617 618 # record one digits 619 if len(striped_string) == 1: 620 result: List[SymbolType] = [self.digits[int(striped_string)]] 621 if len(value_string) != len(striped_string): 622 result = [self.digits[0] for _ in range(len(value_string) - len(striped_string))] + result 623 return result 624 625 # recursively record multiple digits 626 627 # find the unit for the first digit, e.g. 123 -> 10^2 628 result_unit = next(u for u in reversed(self.units) if u.power < len(striped_string)) 629 630 # get the first part of the number, e.g. 123 -> 1 631 result_string = value_string[: -result_unit.power] 632 633 # recursively record the first part of the number, e.g. 123 -> [1, 10^2, 2, 10^1, 3] 634 return self.int2symbols(result_string) + [result_unit] + self.int2symbols(striped_string[-result_unit.power :])
Integer to symbols
Example:
>>> s = NumberingSystem(NumberingType.MID)
>>> s.int2symbols(18)
[1, 10^1, 8]
>>> s.int2symbols(20000180)
[2, 10^3, 10^4, 0, 1, 10^2, 8, 10^1]
>>> s.int2symbols(26)
[2, 10^1, 6]
>>> s.int2symbols(320)
[3, 10^2, 2, 10^1]
>>> s.int2symbols(220)
[2, 10^2, 2, 10^1]
>>> s.int2symbols("220")
[2, 10^2, 2, 10^1]
Arguments:
- int_value (Union[int, str]): Value string, e.g. "0.1", "34"
Returns:
List[SymbolType]: List of values
636 def alt_two_symbols(self, integer_symbols: List[SymbolType]) -> List[SymbolType]: 637 """Alternative two symbols 638 e.g. "二百二" to "两百二", "二千二" to "两千二", "三亿零二万二" to "三亿零两万二 639 640 Args: 641 integer_symbols (List[SymbolType]): Symbols 642 643 Returns: 644 List[SymbolType]: Symbols 645 """ 646 liang = self.digits[2] 647 for i, v in enumerate(integer_symbols): 648 if not isinstance(v, CNDigit): 649 continue 650 if v.int_value != 2: 651 continue 652 next_symbol = integer_symbols[i + 1] if i < len(integer_symbols) - 1 else None 653 previous_symbol = integer_symbols[i - 1] if i > 0 else None 654 655 # if the next symbol is not a unit, skip 656 if not isinstance(next_symbol, CNUnit): 657 continue 658 659 # e.g. "一亿零二百" leading_zero = True 660 leading_zero = getattr(previous_symbol, "int_value", "invalid") == 0 # False == 0 in Python 661 662 # e.g. "二百二" to "两百二" 663 previous_is_unit = isinstance(previous_symbol, CNUnit) 664 665 if not (leading_zero or previous_is_unit or (previous_symbol is None)): 666 continue 667 668 # e.g. "二百二" to "两百二", "二千二" to "两千二" 669 if next_symbol.power > 1: 670 integer_symbols[i].simplified = liang.alt_s 671 integer_symbols[i].traditional = liang.alt_t 672 integer_symbols[i].upper_simplified = liang.alt_s 673 integer_symbols[i].upper_traditional = liang.alt_t 674 return integer_symbols
Alternative two symbols e.g. "二百二" to "两百二", "二千二" to "两千二", "三亿零二万二" to "三亿零两万二
Arguments:
- integer_symbols (List[SymbolType]): Symbols
Returns:
List[SymbolType]: Symbols
677def cn2num( 678 chinese_string: str, 679 numbering_type: Union[str, NumberingType] = NumberingType.MID, 680) -> Union[int, float]: 681 """Convert Chinese number to `int` or `float` value。 682 683 Example: 684 ```python 685 686 >>> cn2num("负零点五") 687 -0.5 688 >>> cn2num("一百八") 689 180 690 >>> cn2num("一百八十") 691 180 692 >>> cn2num("一百八点五六七") 693 180.567 694 >>> cn2num("两千万一百八十") 695 20000180 696 >>> cn2num("正两千万一百八十") 697 20000180 698 699 ``` 700 701 Args: 702 chinese_string (str): Chinese number. 703 numbering_type (Union[str, NumberingType], optional): numbering type. Defaults to `NumberingType.MID`. 704 705 Raises: 706 ValueError: Raised when a character is not in the numbering system, e.g. '你' is not a number nor a unit 707 708 Returns: 709 Union[int, float]: `int` or `float` value 710 """ 711 712 numbering_type = NumberingType(numbering_type) if isinstance(numbering_type, str) else numbering_type 713 714 system = NumberingSystem(numbering_type) 715 int_part, dec_part = system.cn2symbols(chinese_string) 716 int_value = system.get_int_value(int_part) 717 718 # skip unit in decimal value 719 dec_str = "".join([str(d.int_value) for d in dec_part if isinstance(d, CNDigit)]) 720 721 result = float(f"{int_value}.{dec_str}") if dec_part else int_value 722 if int_part and int_part[0] == system.math.negative: 723 result = -result 724 return result
Convert Chinese number to int
or float
value。
Example:
>>> cn2num("负零点五")
-0.5
>>> cn2num("一百八")
180
>>> cn2num("一百八十")
180
>>> cn2num("一百八点五六七")
180.567
>>> cn2num("两千万一百八十")
20000180
>>> cn2num("正两千万一百八十")
20000180
Arguments:
- chinese_string (str): Chinese number.
- numbering_type (Union[str, NumberingType], optional): numbering type. Defaults to
NumberingType.MID
.
Raises:
- ValueError: Raised when a character is not in the numbering system, e.g. '你' is not a number nor a unit
Returns:
Union[int, float]:
int
orfloat
value
728def num2cn( 729 num: Union[int, float, str], 730 numbering_type: Union[str, NumberingType] = NumberingType.MID, 731 upper: bool = False, 732 traditional: bool = False, 733 alt_0: bool = False, 734 alt_2: bool = False, 735) -> str: 736 """Integer or float value to Chinese string 737 738 Example: 739 ```python 740 741 >>> num2cn(16) 742 '十六' 743 >>> num2cn(1) 744 '一' 745 >>> num2cn(116) 746 '一百一十六' 747 >>> num2cn(2401, alt_2=True) 748 '两千四百零一' 749 >>> num2cn(101) 750 '一百零一' 751 >>> num2cn(float("3.4"), numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=False) 752 '三点四' 753 >>> num2cn("3.4", numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=False) 754 '三点四' 755 >>> num2cn(23232.005184132423423423300, numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=True) 756 '兩萬叁仟兩佰叁拾贰點零零伍壹捌肆壹叁贰肆贰肆' 757 >>> num2cn("23232.005184132423423423300", numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=True) 758 '兩萬叁仟兩佰叁拾贰點零零伍壹捌肆壹叁贰肆贰叁肆贰叁肆贰叁叁零零' 759 >>> num2cn('023232.005184132423423423300', numbering_type=NumberingType.HIGH, alt_2=False, upper=False, traditional=False) 760 '二万三千二百三十二点零零五一八四一三二四二三四二三四二三三零零' 761 >>> num2cn(111180000) 762 '一亿一千一百一十八万' 763 >>> num2cn(1821010) 764 '一百八十二万一千零一十' 765 >>> num2cn(182.1) 766 '一百八十二点一' 767 >>> num2cn('3.4') 768 '三点四' 769 >>> num2cn(16) 770 '十六' 771 >>> num2cn(10600) 772 '一万零六百' 773 >>> num2cn(110) 774 '一百一' 775 >>> num2cn(1600) 776 '一千六' 777 778 ``` 779 780 Args: 781 num (Tuple[int, float, str]): `int`, `float` or `str` value 782 numbering_type (Union[str, NumberingType], optional): Numbering type. Defaults to `NumberingType.MID`. 783 upper (bool, optional): Capitalized numbers. Defaults to `False`. 784 traditional (bool, optional): Traditional Chinese. Defaults to `False`. 785 alt_0 (bool, optional): Use alternative form of zero. Defaults to `False`. 786 alt_2 (bool, optional): Use alternative form of two. Defaults to `False`. 787 788 Returns: 789 str: Chinese string 790 """ 791 792 if alt_2 and upper: 793 raise ValueError("alt_2 and upper cannot be True at the same time.") 794 795 if alt_0 and upper: 796 raise ValueError("alt_0 and upper cannot be True at the same time.") 797 798 numbering_type = NumberingType(numbering_type) if isinstance(numbering_type, str) else numbering_type 799 system = NumberingSystem(numbering_type) 800 801 num_str = str(num) 802 dec_string = "" 803 804 if "." in num_str: 805 dec_string = num_str.rsplit(".", 1)[-1] 806 # dec_string = dec_string.rstrip("0") 807 808 int_symbols = system.int2symbols(int(float(num))) 809 dec_symbols = [system.digits[int(c)] for c in dec_string] 810 811 # e.g. "二百二" to "两百二", "二千二" to "两千二", "三亿零二万二" to "三亿零两万二 812 if alt_2: 813 int_symbols = system.alt_two_symbols(int_symbols) 814 815 # attribute name for simplified or traditional with upper case or not 816 attr_name = "traditional" if traditional else "simplified" 817 if upper: 818 attr_name = "upper_" + attr_name 819 820 # remove leading '一' for '十', e.g. 一十六 to 十六 in integer part 821 if len(int_symbols) > 1 and (int_symbols[0] == system.digits[1]) and (getattr(int_symbols[1], "power", -1) == 1): 822 int_symbols = int_symbols[1:] 823 824 # remove trailing units, 1600 -> 一千六, 10600 -> 一萬零六百, 101600 -> 十萬一千六 in integer part 825 if len(int_symbols) > 3 and isinstance(int_symbols[-1], CNUnit): 826 if getattr(int_symbols[-3], "power", None) == (int_symbols[-1].power + 1): 827 int_symbols = int_symbols[:-1] 828 829 int_string = "".join(getattr(s, attr_name) for s in int_symbols) 830 int_string = re.sub(r"零+", "零", int_string) # remove multiple zeros in integer part only 831 if not int_string: 832 int_string = "零" 833 834 dec_string = "".join(getattr(s, attr_name) for s in dec_symbols) 835 836 result = int_string 837 838 if dec_string: 839 result += getattr(system.math.point, attr_name) + dec_string 840 841 if alt_0: 842 result = result.replace(getattr(system.digits[0], attr_name), str(system.digits[0].alt_s)) 843 844 return result
Integer or float value to Chinese string
Example:
>>> num2cn(16)
'十六'
>>> num2cn(1)
'一'
>>> num2cn(116)
'一百一十六'
>>> num2cn(2401, alt_2=True)
'两千四百零一'
>>> num2cn(101)
'一百零一'
>>> num2cn(float("3.4"), numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=False)
'三点四'
>>> num2cn("3.4", numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=False)
'三点四'
>>> num2cn(23232.005184132423423423300, numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=True)
'兩萬叁仟兩佰叁拾贰點零零伍壹捌肆壹叁贰肆贰肆'
>>> num2cn("23232.005184132423423423300", numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=True)
'兩萬叁仟兩佰叁拾贰點零零伍壹捌肆壹叁贰肆贰叁肆贰叁肆贰叁叁零零'
>>> num2cn('023232.005184132423423423300', numbering_type=NumberingType.HIGH, alt_2=False, upper=False, traditional=False)
'二万三千二百三十二点零零五一八四一三二四二三四二三四二三三零零'
>>> num2cn(111180000)
'一亿一千一百一十八万'
>>> num2cn(1821010)
'一百八十二万一千零一十'
>>> num2cn(182.1)
'一百八十二点一'
>>> num2cn('3.4')
'三点四'
>>> num2cn(16)
'十六'
>>> num2cn(10600)
'一万零六百'
>>> num2cn(110)
'一百一'
>>> num2cn(1600)
'一千六'
Arguments:
- num (Tuple[int, float, str]):
int
,float
orstr
value - numbering_type (Union[str, NumberingType], optional): Numbering type. Defaults to
NumberingType.MID
. - upper (bool, optional): Capitalized numbers. Defaults to
False
. - traditional (bool, optional): Traditional Chinese. Defaults to
False
. - alt_0 (bool, optional): Use alternative form of zero. Defaults to
False
. - alt_2 (bool, optional): Use alternative form of two. Defaults to
False
.
Returns:
str: Chinese string