pycnnum.pycnnum

Chinese number <=> int/float conversion

  1# pylint: disable=line-too-long
  2"""Chinese number <=> int/float conversion """
  3
  4from __future__ import annotations
  5
  6import copy
  7import re
  8from dataclasses import dataclass
  9from enum import Enum
 10from functools import cached_property
 11from math import log2
 12from typing import Callable, List, Optional, Tuple, Union
 13
 14from .constants import DIGITS, UNITS
 15
 16
 17class NumberingType(Enum):
 18    r"""Numbering system types: LOW, MID, HIGH
 19
 20    Chinese numbering types:
 21
 22    For $i \in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]$:
 23
 24
 25    - `LOW`  : $10^{8 + i}$
 26    - `MID`  : $10^{8 + i*4}$
 27    - `HIGH` : $10^{2^{i+3}}$
 28
 29    ---
 30
 31    | type  |  亿     | 兆      | 京      | 垓       | 秭       | 穰       | 沟       | 涧        | 正        | 载         |
 32    |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------|
 33    |`low`  | $10^{8}$|$10^{9}$ |$10^{10}$|$10^{11}$|$10^{12}$ |$10^{13}$ |$10^{14}$ |$10^{15}$  |$10^{16}$  |$10^{17}$  |
 34    |`mid`  | $10^{8}$|$10^{12}$|$10^{16}$|$10^{20}$|$10^{24}$ |$10^{28}$ |$10^{32}$ |$10^{36}$  |$10^{40}$  |$10^{44}$  |
 35    |`high` | $10^{8}$|$10^{16}$|$10^{32}$|$10^{64}$|$10^{128}$|$10^{256}$|$10^{512}$|$10^{1024}$|$10^{2048}$|$10^{4096}$|
 36    """
 37
 38    LOW = "low"
 39    """
 40        | type  |  亿     | 兆      | 京      | 垓       | 秭       | 穰       | 沟       | 涧        | 正        | 载         |
 41        |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------|
 42        |`low`  | $10^{8}$|$10^{9}$ |$10^{10}$|$10^{11}$|$10^{12}$ |$10^{13}$ |$10^{14}$ |$10^{15}$  |$10^{16}$  |$10^{17}$  |
 43    """
 44
 45    MID = "mid"
 46    """
 47        | type  |  亿     | 兆      | 京      | 垓       | 秭       | 穰       | 沟       | 涧        | 正        | 载         |
 48        |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------|
 49        |`mid`  | $10^{8}$|$10^{12}$|$10^{16}$|$10^{20}$|$10^{24}$ |$10^{28}$ |$10^{32}$ |$10^{36}$  |$10^{40}$  |$10^{44}$  |
 50    """
 51
 52    HIGH = "high"
 53    """
 54        | type  |  亿     | 兆      | 京      | 垓       | 秭       | 穰       | 沟       | 涧        | 正        | 载         |
 55        |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------|
 56        |`high` | $10^{8}$|$10^{16}$|$10^{32}$|$10^{64}$|$10^{128}$|$10^{256}$|$10^{512}$|$10^{1024}$|$10^{2048}$|$10^{4096}$|
 57    """
 58
 59    @property
 60    def powers(self) -> List[int]:
 61        """Powers of units for each numbering type"""
 62        return {
 63            NumberingType.LOW: [8 + i for i in range(10)],
 64            NumberingType.MID: [8 + 4 * i for i in range(10)],
 65            NumberingType.HIGH: [2 ** (i + 3) for i in range(10)],
 66        }[self]
 67
 68
 69# region char classes
 70@dataclass
 71class CNChar:
 72    """Base Chinese char class.
 73
 74    Each object has simplified and traditional strings.
 75    When converted to string, it will shows the simplified string or traditional string or space `' '`.
 76
 77    Example:
 78    ```python
 79
 80    >>> negative = CNChar(simplified="负", traditional="負")
 81    >>> negative.simplified
 82    '负'
 83    >>> negative.traditional
 84    '負'
 85    >>> negative.upper_simplified
 86    '负'
 87    >>> negative.upper_traditional
 88    '負'
 89
 90    ```
 91    """
 92
 93    simplified: Optional[str] = None
 94    """Simplified Chinese character"""
 95
 96    traditional: Optional[str] = None
 97    """Traditional Chinese character"""
 98
 99    upper_simplified: Optional[str] = None
100    """Capitalized character in simplified Chinese. Defaults to `None` means same as `self.simplified`."""
101
102    upper_traditional: Optional[str] = None
103    """Capitalized character in traditional Chinese. Defaults to `None` means same as `self.traditional`."""
104
105    @property
106    def all_forms(self) -> str:
107        """All forms of the character"""
108        return "".join(v for v in self.__dict__.values() if isinstance(v, str))
109
110    def __post_init__(self):
111        """Post initialization"""
112        self.simplified = self.simplified or ""
113        self.traditional = self.traditional or self.simplified
114        self.upper_simplified = self.upper_simplified or self.simplified
115        self.upper_traditional = self.upper_traditional or self.traditional
116
117    def __str__(self) -> str:
118        return self.simplified if self.simplified else f"Empty {self.__class__.__name__}"
119
120    def __repr__(self) -> str:
121        return str(self)
122
123
124@dataclass
125class CNUnit(CNChar):
126    """Chinese number unit class
127
128    Each of it is an `CNChar` with additional upper type strings.
129
130    Example:
131    ```python
132
133    >>> wan = CNUnit(*"万萬萬萬", power=4)
134    >>> wan
135    10^4
136
137    ```
138    """
139
140    power: int = 0
141    r"""The power of this unit, e.g. `power` = 4 for `'万'` ( \(10^4\) )"""
142
143    def __str__(self) -> str:
144        return f"10^{self.power}"
145
146    def __repr__(self) -> str:
147        return str(self)
148
149    @classmethod
150    def create(cls, power: int, numbering_type: NumberingType = NumberingType.MID) -> CNUnit:
151        """Create one unit character based on power value from constants
152
153        - `SMALLER_CHINESE_NUMBERING_UNITS_SIMPLIFIED`
154        - `SMALLER_CHINESE_NUMBERING_UNITS_TRADITIONAL`
155        - `LARGER_CHINESE_NUMBERING_UNITS_SIMPLIFIED`
156        - `LARGER_CHINESE_NUMBERING_UNITS_TRADITIONAL`
157
158        Example:
159        ```python
160
161        >>> CNUnit.create(power=8, numbering_type=NumberingType.LOW).simplified
162        '亿'
163        >>> CNUnit.create(power=12, numbering_type=NumberingType.LOW).simplified
164        '秭'
165        >>> CNUnit.create(power=12, numbering_type=NumberingType.HIGH).simplified
166        Traceback (most recent call last):
167        ...
168        ValueError: power = 12 is invalid for numbering_type = <NumberingType.HIGH: 'high'>
169
170        ```
171
172        Args:
173            power (int): Unit power, starting from 1.
174            numbering_type (NumberingType, optional): Numbering type. Defaults to `NumberingType.MID`.
175
176        Raises:
177            ValueError: Raised when invalid `numbering_type` is provided
178
179        Returns:
180            CNUnit: Created unit object
181        """
182        assert power > 0, "Power should be greater than 0."
183
184        if power < 5:
185            return cls(*UNITS[power - 1], power=power)  # type: ignore[misc]
186
187        i = float(power - 8)
188        if numbering_type == NumberingType.LOW:
189            pass
190        elif numbering_type == NumberingType.MID:
191            i = i / 4
192        elif numbering_type == NumberingType.HIGH:
193            i = log2(power) - 3
194        else:
195            raise ValueError(f"Numbering type should be in {NumberingType} but {numbering_type} is provided.")
196
197        i = int(i) if i.is_integer() else -1
198
199        if i < 0:
200            raise ValueError(f"{power = } is invalid for {numbering_type = }")
201
202        return cls(*UNITS[i + 4], power=power)  # type: ignore[misc]
203
204
205@dataclass
206class CNDigit(CNChar):
207    """Chinese number digit class
208
209    Example:
210    ```python
211
212    >>> CNDigit(*"三叁叁叁", int_value=3)
213    3
214
215    ```
216    """
217
218    int_value: int = 0
219    """Integer value of the digit, 0 to 9. Defaults to 0."""
220
221    alt_s: Optional[str] = None
222    """Alternative simplified character, e.g. '两' for 2. Defaults to `None`.
223    """
224
225    alt_t: Optional[str] = None
226    """Alternative traditional character, e.g. '俩' for 2. Defaults to `None`.
227    """
228
229    def __str__(self):
230        return str(self.int_value)
231
232    def __repr__(self):
233        return str(self)
234
235
236@dataclass
237class CNMath(CNChar):
238    """
239    Chinese math operators
240
241    Example:
242    ```python
243
244    >>> positive = CNMath(*"正正正正", symbol="+", expression=lambda x: +x)
245    >>> positive.symbol
246    '+'
247
248    ```
249    """
250
251    symbol: str = ""
252    """Mathematical symbol, e.g. '+'. Defaults to ``."""
253
254    expression: Optional[Callable] = None
255    """Mathematical expression, e.g. `lambda x: +x`. Defaults to `None`."""
256
257    def __str__(self):
258        return self.symbol
259
260    def __repr__(self):
261        return str(self)
262
263
264SymbolType = Union[CNUnit, CNDigit, CNMath]
265
266
267@dataclass
268class MathSymbols:
269    """Math symbols used in Chinese for both traditional and simplified Chinese
270
271    - positive = ['正', '正']
272    - negative = ['负', '負']
273    - point = ['点', '點']
274
275    Used in `NumberingSystem`.
276
277    Example:
278    ```python
279
280    >>> positive = CNMath(*"正正正正", symbol="+", expression=lambda x: +x)
281    >>> negative = CNMath(*"负負负負", symbol="-", expression=lambda x: -x)
282    >>> point = CNMath(*"点點点點", symbol=".", expression=lambda integer, decimal: float(str(integer) + "." + str(decimal)))
283    >>> math = MathSymbols(positive, negative, point)
284    >>> math.positive
285    +
286    >>> list(math)
287    [+, -, .]
288    >>> for i in math:
289    ...     print(i)
290    +
291    -
292    .
293
294    ```
295    """
296
297    positive: CNMath
298    """Positive"""
299
300    negative: CNMath
301    """Negative"""
302
303    point: CNMath
304    """Decimal point"""
305
306    def __iter__(self):
307        for v in self.__dict__.values():
308            yield v
309
310
311class NumberingSystem:
312    """Numbering system class
313
314    Example:
315    ```python
316
317    >>> system = NumberingSystem(NumberingType.MID)
318    >>> system.numbering_type
319    <NumberingType.MID: 'mid'>
320    >>> system.digits[0]
321    0
322    >>> system.units[0]
323    10^1
324    >>> system.units[7].simplified
325    '垓'
326    >>> system.math.positive
327    +
328
329    ```
330    """
331
332    # region: fields
333    _numbering_type: NumberingType
334    """Numbering type"""
335
336    _digits: List[CNDigit]
337    """Digits"""
338
339    _units: List[CNUnit]
340    """Units"""
341
342    _maths: MathSymbols
343    """Math symbols"""
344
345    @property
346    def numbering_type(self) -> NumberingType:
347        """Numbering type"""
348        return self._numbering_type
349
350    @numbering_type.setter
351    def numbering_type(self, value: NumberingType):
352        self._numbering_type = value
353        self._units[4:] = [CNUnit.create(i, self._numbering_type) for i in self._numbering_type.powers]
354
355    @property
356    def digits(self) -> List[CNDigit]:
357        """Digits"""
358        result = [CNDigit(*v, int_value=i) for i, v in enumerate(DIGITS)]  # type: ignore[misc]
359        result[0].alt_s, result[0].alt_t = "〇", "〇"
360        result[2].alt_s, result[2].alt_t = "两", "兩"
361        return result
362
363    @property
364    def units(self) -> List[CNUnit]:
365        """Units"""
366        return self._units
367
368    @cached_property
369    def math(self) -> MathSymbols:
370        """Math symbols"""
371        positive_cn = CNMath(*"正正", symbol="+", expression=lambda x: x)  # type: ignore[misc]
372        negative_cn = CNMath(*"负負", symbol="-", expression=lambda x: -x)  # type: ignore[misc]
373        point_cn = CNMath(*"点點", symbol=".", expression=lambda i, d: float(f"{i}.{d}"))  # type: ignore[misc]
374        return MathSymbols(positive_cn, negative_cn, point_cn)
375
376    # endregion: fields
377
378    def __init__(self, numbering_type: NumberingType = NumberingType.MID) -> None:
379        """Construction"""
380        self._numbering_type = numbering_type
381        self._units = [CNUnit(*UNITS[i], power=i + 1) for i in range(4)]  # type: ignore[misc]
382        self._units[4:] = [CNUnit.create(i, self._numbering_type) for i in self._numbering_type.powers]
383
384    def __getitem__(self, key: str) -> SymbolType:
385        if not isinstance(key, str):
386            raise ValueError(f"{key = } should be a string.")
387
388        for c in self.units + self.digits + list(self.math):
389            if key in c.all_forms:
390                return c
391
392        raise ValueError(f"{key} is not in {self.numbering_type.name} system.")
393
394    def cn2symbols(self, cn_str: str) -> Tuple[List[SymbolType], List[SymbolType]]:
395        """Chinese string to symbols
396
397        Example:
398        ```python
399
400        >>> system = NumberingSystem(NumberingType.MID)
401        >>> system.cn2symbols("一百八")
402        ([1, 10^2, 8], [])
403        >>> system.cn2symbols("一百八十")
404        ([1, 10^2, 8, 10^1], [])
405        >>> system.cn2symbols("一百八点五六七")
406        ([1, 10^2, 8], [5, 6, 7])
407        >>> system.cn2symbols("两千万一百八十")
408        ([2, 10^7, 1, 10^2, 8, 10^1], [])
409        >>> system.cn2symbols("正两千万一百八十")
410        ([+, 2, 10^7, 1, 10^2, 8, 10^1], [])
411        >>> system.cn2symbols("负两千万一百八十")
412        ([-, 2, 10^7, 1, 10^2, 8, 10^1], [])
413        >>> system.cn2symbols("点负两千万一百八十")
414        Traceback (most recent call last):
415        ...
416        ValueError: First symbol in decimal part should not be a math symbol, - is provided.
417        >>> system.cn2symbols("两千万点一百点八十")
418        Traceback (most recent call last):
419        ...
420        ValueError: Multiple points in the number 两千万点一百点八十.
421        >>> system.cn2symbols("两千万点一百點八十")
422        Traceback (most recent call last):
423        ...
424        ValueError: Multiple points in the number 两千万点一百點八十.
425
426        ```
427
428        Args:
429            cn_str (str): Chinese number
430
431        Returns:
432            Tuple[List[SymbolType], List[SymbolType]]: Integer symbols, decimal symbols
433        """
434        if cn_str == "":
435            return [], []
436
437        int_part, dec_part = cn_str, ""
438        int_dec = re.split(r"\.|点|點", cn_str)
439        if len(int_dec) > 2:
440            raise ValueError(f"Multiple points in the number {cn_str}.")
441        int_part, dec_part = int_dec if len(int_dec) == 2 else (int_dec[0], "")
442
443        integer_value = [copy.deepcopy(self[c]) for c in int_part]
444
445        for i, v in enumerate(integer_value):
446            if not isinstance(v, CNUnit):
447                continue
448            if i + 1 < len(integer_value) and isinstance(integer_value[i + 1], CNUnit):
449                v.power += integer_value[i + 1].power  # type: ignore[union-attr]
450                integer_value[i + 1] = None  # type: ignore[union-attr]
451
452        integer_value = [v for v in integer_value if v is not None]
453
454        for i, v in enumerate(integer_value):
455            if not isinstance(v, CNUnit):
456                continue
457            for u in integer_value[i + 1 :]:
458                if isinstance(u, CNUnit) and u.power > v.power:
459                    v.power += u.power
460                    break
461
462        decimal_value = [copy.deepcopy(self[c]) for c in dec_part]
463
464        # if first symbol is a math symbol, e.g. '正两千万一百八十'
465        if int_part and (first_symbol := [c for c in self.math if int_part[0] in c.all_forms]):
466            integer_value[0] = first_symbol[0]
467
468        # if first symbol is a math symbol, e.g. '点负两千万一百八十'
469        if decimal_value and (decimal_value[0] in self.math):
470            raise ValueError(
471                f"First symbol in decimal part should not be a math symbol, {decimal_value[0]} is provided."
472            )
473
474        return integer_value, decimal_value
475
476    def _refine_integer_symbols(self, integer_symbols: List[SymbolType]) -> List[SymbolType]:
477        """Refine integer symbols. Do not change math symbols.
478
479        Example:
480        ```python
481
482        >>> s = NumberingSystem(NumberingType.MID)
483        >>> s._refine_integer_symbols(s.cn2symbols("十八")[0])
484        [1, 10^1, 8, 10^0]
485        >>> s._refine_integer_symbols(s.cn2symbols("两千万一百八十")[0])
486        [2, 10^7, 1, 10^2, 8, 10^1]
487        >>> s._refine_integer_symbols(s.cn2symbols("两千万零一百八十")[0])
488        [2, 10^7, 1, 10^2, 8, 10^1]
489        >>> s._refine_integer_symbols(s.cn2symbols("两亿六")[0])
490        [2, 10^8, 6, 10^7]
491
492        ```
493
494        Args:
495            integer_symbols (List[SymbolType]): Raw integer symbols
496
497        Returns:
498            List[SymbolType]: Refined symbols
499        """
500        if not integer_symbols:
501            return integer_symbols
502
503        # first symbol is unit, add 1 before it, e.g. , "十五" to "一十五"
504        if isinstance(integer_symbols[0], CNUnit) and integer_symbols[0].power == 1:
505            integer_symbols = [self.digits[1]] + integer_symbols
506
507        # last symbol is digit and the second last symbol is unit
508        # e.g. "十 五" to "十 五 10^0",  "二 百 五" to "二 百 五 10^1"
509        if len(integer_symbols) > 1:
510            if isinstance(integer_symbols[-1], CNDigit) and isinstance(integer_symbols[-2], CNUnit):
511                # add a dummy unit
512                integer_symbols += [CNUnit(power=integer_symbols[-2].power - 1)]
513
514        result: List[SymbolType] = []
515        unit_count = 0
516        for s in integer_symbols:
517            if isinstance(s, CNMath):
518                # reset unit_count, e.g. "两千万" has two units
519                unit_count = 0
520                continue
521            if isinstance(s, CNDigit):
522                # reset unit_count, e.g. "两千万" has two units
523                unit_count = 0
524                if s.int_value > 0:
525                    result.append(s)
526                continue
527            if not isinstance(s, CNUnit):
528                raise ValueError(f"Invalid symbol {s} in {integer_symbols}.")
529
530            # create a dummy unit
531            current_unit = CNUnit("", "", "", "", s.power)
532            unit_count += 1
533
534            # store the first met unit
535            if unit_count == 1:
536                result.append(current_unit)
537                continue
538
539            # if there are more than one units, sum them, e.g. "两 千 万" to "两 10^7"
540            result[-1].power += current_unit.power  # type: ignore[union-attr]
541        if integer_symbols[0] == self.math.negative:
542            result = [self.math.negative] + result
543        return result
544
545    def get_int_value(self, integer_symbols: List[SymbolType]) -> int:
546        """Compute the value from symbol
547
548        Example:
549        ```python
550
551        >>> s = NumberingSystem(NumberingType.MID)
552        >>> s.get_int_value(s.cn2symbols("十八")[0])
553        18
554        >>> s.get_int_value(s.cn2symbols("两千万一百八十")[0])
555        20000180
556        >>> s.get_int_value(s.cn2symbols("两亿六")[0])
557        260000000
558
559        ```
560
561        Args:
562            integer_symbols (List[SymbolType]): Symbols, without point
563
564        Returns:
565            int: value
566        """
567        integer_symbols = self._refine_integer_symbols(integer_symbols)
568
569        value = [0]
570        last_power = 0
571        for s in integer_symbols:
572            if isinstance(s, CNDigit):
573                value[-1] = s.int_value
574            elif isinstance(s, CNUnit):
575                value[-1] *= pow(10, s.power)
576                if s.power > last_power:
577                    value[:-1] = [v * pow(10, s.power) for v in value[:-1]]  # pylint: disable=no-member
578                    last_power = s.power
579                value.append(0)
580        return sum(value)
581
582    def int2symbols(self, int_value: Union[int, str]) -> List[SymbolType]:
583        """Integer to symbols
584
585        Example:
586        ```python
587
588        >>> s = NumberingSystem(NumberingType.MID)
589        >>> s.int2symbols(18)
590        [1, 10^1, 8]
591        >>> s.int2symbols(20000180)
592        [2, 10^3, 10^4, 0, 1, 10^2, 8, 10^1]
593        >>> s.int2symbols(26)
594        [2, 10^1, 6]
595        >>> s.int2symbols(320)
596        [3, 10^2, 2, 10^1]
597        >>> s.int2symbols(220)
598        [2, 10^2, 2, 10^1]
599        >>> s.int2symbols("220")
600        [2, 10^2, 2, 10^1]
601
602        ```
603
604        Args:
605            int_value (Union[int, str]): Value string, e.g. "0.1", "34"
606
607        Returns:
608            List[SymbolType]: List of values
609        """
610        value_string = str(int_value)
611        striped_string = value_string.lstrip("0")
612
613        # record nothing if all zeros
614        if not striped_string:
615            return []
616
617        # record one digits
618        if len(striped_string) == 1:
619            result: List[SymbolType] = [self.digits[int(striped_string)]]
620            if len(value_string) != len(striped_string):
621                result = [self.digits[0] for _ in range(len(value_string) - len(striped_string))] + result
622            return result
623
624        # recursively record multiple digits
625
626        # find the unit for the first digit, e.g. 123 -> 10^2
627        result_unit = next(u for u in reversed(self.units) if u.power < len(striped_string))
628
629        # get the first part of the number, e.g. 123 -> 1
630        result_string = value_string[: -result_unit.power]
631
632        # recursively record the first part of the number, e.g. 123 -> [1, 10^2, 2, 10^1, 3]
633        return self.int2symbols(result_string) + [result_unit] + self.int2symbols(striped_string[-result_unit.power :])
634
635    def alt_two_symbols(self, integer_symbols: List[SymbolType]) -> List[SymbolType]:
636        """Alternative two symbols
637        e.g. "二百二" to "两百二", "二千二" to "两千二", "三亿零二万二" to "三亿零两万二
638
639        Args:
640            integer_symbols (List[SymbolType]): Symbols
641
642        Returns:
643            List[SymbolType]: Symbols
644        """
645        liang = self.digits[2]
646        for i, v in enumerate(integer_symbols):
647            if not isinstance(v, CNDigit):
648                continue
649            if v.int_value != 2:
650                continue
651            next_symbol = integer_symbols[i + 1] if i < len(integer_symbols) - 1 else None
652            previous_symbol = integer_symbols[i - 1] if i > 0 else None
653
654            # if the next symbol is not a unit, skip
655            if not isinstance(next_symbol, CNUnit):
656                continue
657
658            # e.g. "一亿零二百" leading_zero = True
659            leading_zero = getattr(previous_symbol, "int_value", "invalid") == 0  # False == 0 in Python
660
661            # e.g. "二百二" to "两百二"
662            previous_is_unit = isinstance(previous_symbol, CNUnit)
663
664            if not (leading_zero or previous_is_unit or (previous_symbol is None)):
665                continue
666
667            # e.g. "二百二" to "两百二", "二千二" to "两千二"
668            if next_symbol.power > 1:
669                integer_symbols[i].simplified = liang.alt_s
670                integer_symbols[i].traditional = liang.alt_t
671                integer_symbols[i].upper_simplified = liang.alt_s
672                integer_symbols[i].upper_traditional = liang.alt_t
673        return integer_symbols
674
675
676def cn2num(
677    chinese_string: str,
678    numbering_type: Union[str, NumberingType] = NumberingType.MID,
679) -> Union[int, float]:
680    """Convert Chinese number to `int` or `float` value。
681
682    Example:
683    ```python
684
685    >>> cn2num("负零点五")
686    -0.5
687    >>> cn2num("一百八")
688    180
689    >>> cn2num("一百八十")
690    180
691    >>> cn2num("一百八点五六七")
692    180.567
693    >>> cn2num("两千万一百八十")
694    20000180
695    >>> cn2num("正两千万一百八十")
696    20000180
697
698    ```
699
700    Args:
701        chinese_string (str): Chinese number.
702        numbering_type (Union[str, NumberingType], optional): numbering type. Defaults to `NumberingType.MID`.
703
704    Raises:
705        ValueError: Raised when a character is not in the numbering system, e.g. '你' is not a number nor a unit
706
707    Returns:
708        Union[int, float]: `int` or `float` value
709    """
710
711    numbering_type = NumberingType(numbering_type) if isinstance(numbering_type, str) else numbering_type
712
713    system = NumberingSystem(numbering_type)
714    int_part, dec_part = system.cn2symbols(chinese_string)
715    int_value = system.get_int_value(int_part)
716
717    # skip unit in decimal value
718    dec_str = "".join([str(d.int_value) for d in dec_part if isinstance(d, CNDigit)])
719
720    result = float(f"{int_value}.{dec_str}") if dec_part else int_value
721    if int_part and int_part[0] == system.math.negative:
722        result = -result
723    return result
724
725
726# pylint: disable-next=too-many-arguments
727def num2cn(
728    num: Union[int, float, str],
729    numbering_type: Union[str, NumberingType] = NumberingType.MID,
730    upper: bool = False,
731    traditional: bool = False,
732    alt_0: bool = False,
733    alt_2: bool = False,
734) -> str:
735    """Integer or float value to Chinese string
736
737    Example:
738    ```python
739
740    >>> num2cn(16)
741    '十六'
742    >>> num2cn(1)
743    '一'
744    >>> num2cn(116)
745    '一百一十六'
746    >>> num2cn(2401, alt_2=True)
747    '两千四百零一'
748    >>> num2cn(101)
749    '一百零一'
750    >>> num2cn(float("3.4"), numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=False)
751    '三点四'
752    >>> num2cn("3.4", numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=False)
753    '三点四'
754    >>> num2cn(23232.005184132423423423300, numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=True)
755    '兩萬叁仟兩佰叁拾贰點零零伍壹捌肆壹叁贰肆贰肆'
756    >>> num2cn("23232.005184132423423423300", numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=True)
757    '兩萬叁仟兩佰叁拾贰點零零伍壹捌肆壹叁贰肆贰叁肆贰叁肆贰叁叁零零'
758    >>> num2cn('023232.005184132423423423300', numbering_type=NumberingType.HIGH, alt_2=False, upper=False, traditional=False)
759    '二万三千二百三十二点零零五一八四一三二四二三四二三四二三三零零'
760    >>> num2cn(111180000)
761    '一亿一千一百一十八万'
762    >>> num2cn(1821010)
763    '一百八十二万一千零一十'
764    >>> num2cn(182.1)
765    '一百八十二点一'
766    >>> num2cn('3.4')
767    '三点四'
768    >>> num2cn(16)
769    '十六'
770    >>> num2cn(10600)
771    '一万零六百'
772    >>> num2cn(110)
773    '一百一'
774    >>> num2cn(1600)
775    '一千六'
776
777    ```
778
779    Args:
780        num (Tuple[int, float, str]): `int`, `float` or `str` value
781        numbering_type (Union[str, NumberingType], optional): Numbering type. Defaults to `NumberingType.MID`.
782        upper (bool, optional): Capitalized numbers. Defaults to `False`.
783        traditional (bool, optional): Traditional Chinese. Defaults to `False`.
784        alt_0 (bool, optional): Use alternative form of zero. Defaults to `False`.
785        alt_2 (bool, optional): Use alternative form of two. Defaults to `False`.
786
787    Returns:
788        str: Chinese string
789    """
790
791    if alt_2 and upper:
792        raise ValueError("alt_2 and upper cannot be True at the same time.")
793
794    if alt_0 and upper:
795        raise ValueError("alt_0 and upper cannot be True at the same time.")
796
797    numbering_type = NumberingType(numbering_type) if isinstance(numbering_type, str) else numbering_type
798    system = NumberingSystem(numbering_type)
799
800    num_str = str(num)
801    dec_string = ""
802
803    if "." in num_str:
804        dec_string = num_str.rsplit(".", 1)[-1]
805        # dec_string = dec_string.rstrip("0")
806
807    int_symbols = system.int2symbols(int(float(num)))
808    dec_symbols = [system.digits[int(c)] for c in dec_string]
809
810    # e.g. "二百二" to "两百二", "二千二" to "两千二", "三亿零二万二" to "三亿零两万二
811    if alt_2:
812        int_symbols = system.alt_two_symbols(int_symbols)
813
814    # attribute name for simplified or traditional with upper case or not
815    attr_name = "traditional" if traditional else "simplified"
816    if upper:
817        attr_name = "upper_" + attr_name
818
819    # remove leading '一' for '十', e.g. 一十六 to 十六 in integer part
820    if len(int_symbols) > 1 and (int_symbols[0] == system.digits[1]) and (getattr(int_symbols[1], "power", -1) == 1):
821        int_symbols = int_symbols[1:]
822
823    # remove trailing units, 1600 -> 一千六, 10600 -> 一萬零六百, 101600 -> 十萬一千六 in integer part
824    if len(int_symbols) > 3 and isinstance(int_symbols[-1], CNUnit):
825        if getattr(int_symbols[-3], "power", None) == (int_symbols[-1].power + 1):
826            int_symbols = int_symbols[:-1]
827
828    int_string = "".join(getattr(s, attr_name) for s in int_symbols)
829    int_string = re.sub(r"零+", "零", int_string)  # remove multiple zeros in integer part only
830    if not int_string:
831        int_string = "零"
832
833    dec_string = "".join(getattr(s, attr_name) for s in dec_symbols)
834
835    result = int_string
836
837    if dec_string:
838        result += getattr(system.math.point, attr_name) + dec_string
839
840    if alt_0:
841        result = result.replace(getattr(system.digits[0], attr_name), str(system.digits[0].alt_s))
842
843    return result
class NumberingType(enum.Enum):
18class NumberingType(Enum):
19    r"""Numbering system types: LOW, MID, HIGH
20
21    Chinese numbering types:
22
23    For $i \in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]$:
24
25
26    - `LOW`  : $10^{8 + i}$
27    - `MID`  : $10^{8 + i*4}$
28    - `HIGH` : $10^{2^{i+3}}$
29
30    ---
31
32    | type  |  亿     | 兆      | 京      | 垓       | 秭       | 穰       | 沟       | 涧        | 正        | 载         |
33    |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------|
34    |`low`  | $10^{8}$|$10^{9}$ |$10^{10}$|$10^{11}$|$10^{12}$ |$10^{13}$ |$10^{14}$ |$10^{15}$  |$10^{16}$  |$10^{17}$  |
35    |`mid`  | $10^{8}$|$10^{12}$|$10^{16}$|$10^{20}$|$10^{24}$ |$10^{28}$ |$10^{32}$ |$10^{36}$  |$10^{40}$  |$10^{44}$  |
36    |`high` | $10^{8}$|$10^{16}$|$10^{32}$|$10^{64}$|$10^{128}$|$10^{256}$|$10^{512}$|$10^{1024}$|$10^{2048}$|$10^{4096}$|
37    """
38
39    LOW = "low"
40    """
41        | type  |  亿     | 兆      | 京      | 垓       | 秭       | 穰       | 沟       | 涧        | 正        | 载         |
42        |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------|
43        |`low`  | $10^{8}$|$10^{9}$ |$10^{10}$|$10^{11}$|$10^{12}$ |$10^{13}$ |$10^{14}$ |$10^{15}$  |$10^{16}$  |$10^{17}$  |
44    """
45
46    MID = "mid"
47    """
48        | type  |  亿     | 兆      | 京      | 垓       | 秭       | 穰       | 沟       | 涧        | 正        | 载         |
49        |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------|
50        |`mid`  | $10^{8}$|$10^{12}$|$10^{16}$|$10^{20}$|$10^{24}$ |$10^{28}$ |$10^{32}$ |$10^{36}$  |$10^{40}$  |$10^{44}$  |
51    """
52
53    HIGH = "high"
54    """
55        | type  |  亿     | 兆      | 京      | 垓       | 秭       | 穰       | 沟       | 涧        | 正        | 载         |
56        |-------| --------|---------|---------|---------|----------|----------|----------|-----------|-----------|-----------|
57        |`high` | $10^{8}$|$10^{16}$|$10^{32}$|$10^{64}$|$10^{128}$|$10^{256}$|$10^{512}$|$10^{1024}$|$10^{2048}$|$10^{4096}$|
58    """
59
60    @property
61    def powers(self) -> List[int]:
62        """Powers of units for each numbering type"""
63        return {
64            NumberingType.LOW: [8 + i for i in range(10)],
65            NumberingType.MID: [8 + 4 * i for i in range(10)],
66            NumberingType.HIGH: [2 ** (i + 3) for i in range(10)],
67        }[self]

Numbering system types: LOW, MID, HIGH

Chinese numbering types:

For $i \in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]$:

  • LOW : $10^{8 + i}$
  • MID : $10^{8 + i*4}$
  • HIGH : $10^{2^{i+3}}$

type 亿
low $10^{8}$ $10^{9}$ $10^{10}$ $10^{11}$ $10^{12}$ $10^{13}$ $10^{14}$ $10^{15}$ $10^{16}$ $10^{17}$
mid $10^{8}$ $10^{12}$ $10^{16}$ $10^{20}$ $10^{24}$ $10^{28}$ $10^{32}$ $10^{36}$ $10^{40}$ $10^{44}$
high $10^{8}$ $10^{16}$ $10^{32}$ $10^{64}$ $10^{128}$ $10^{256}$ $10^{512}$ $10^{1024}$ $10^{2048}$ $10^{4096}$
LOW = <NumberingType.LOW: 'low'>
type 亿
low $10^{8}$ $10^{9}$ $10^{10}$ $10^{11}$ $10^{12}$ $10^{13}$ $10^{14}$ $10^{15}$ $10^{16}$ $10^{17}$
MID = <NumberingType.MID: 'mid'>
type 亿
mid $10^{8}$ $10^{12}$ $10^{16}$ $10^{20}$ $10^{24}$ $10^{28}$ $10^{32}$ $10^{36}$ $10^{40}$ $10^{44}$
HIGH = <NumberingType.HIGH: 'high'>
type 亿
high $10^{8}$ $10^{16}$ $10^{32}$ $10^{64}$ $10^{128}$ $10^{256}$ $10^{512}$ $10^{1024}$ $10^{2048}$ $10^{4096}$
powers: List[int]
60    @property
61    def powers(self) -> List[int]:
62        """Powers of units for each numbering type"""
63        return {
64            NumberingType.LOW: [8 + i for i in range(10)],
65            NumberingType.MID: [8 + 4 * i for i in range(10)],
66            NumberingType.HIGH: [2 ** (i + 3) for i in range(10)],
67        }[self]

Powers of units for each numbering type

Inherited Members
enum.Enum
name
value
class CNChar:
 72class CNChar:
 73    """Base Chinese char class.
 74
 75    Each object has simplified and traditional strings.
 76    When converted to string, it will shows the simplified string or traditional string or space `' '`.
 77
 78    Example:
 79    ```python
 80
 81    >>> negative = CNChar(simplified="负", traditional="負")
 82    >>> negative.simplified
 83    '负'
 84    >>> negative.traditional
 85    '負'
 86    >>> negative.upper_simplified
 87    '负'
 88    >>> negative.upper_traditional
 89    '負'
 90
 91    ```
 92    """
 93
 94    simplified: Optional[str] = None
 95    """Simplified Chinese character"""
 96
 97    traditional: Optional[str] = None
 98    """Traditional Chinese character"""
 99
100    upper_simplified: Optional[str] = None
101    """Capitalized character in simplified Chinese. Defaults to `None` means same as `self.simplified`."""
102
103    upper_traditional: Optional[str] = None
104    """Capitalized character in traditional Chinese. Defaults to `None` means same as `self.traditional`."""
105
106    @property
107    def all_forms(self) -> str:
108        """All forms of the character"""
109        return "".join(v for v in self.__dict__.values() if isinstance(v, str))
110
111    def __post_init__(self):
112        """Post initialization"""
113        self.simplified = self.simplified or ""
114        self.traditional = self.traditional or self.simplified
115        self.upper_simplified = self.upper_simplified or self.simplified
116        self.upper_traditional = self.upper_traditional or self.traditional
117
118    def __str__(self) -> str:
119        return self.simplified if self.simplified else f"Empty {self.__class__.__name__}"
120
121    def __repr__(self) -> str:
122        return str(self)

Base Chinese char class.

Each object has simplified and traditional strings. When converted to string, it will shows the simplified string or traditional string or space ' '.

Example:

>>> negative = CNChar(simplified="负", traditional="負")
>>> negative.simplified
'负'
>>> negative.traditional
'負'
>>> negative.upper_simplified
'负'
>>> negative.upper_traditional
'負'
CNChar( simplified: Union[str, NoneType] = None, traditional: Union[str, NoneType] = None, upper_simplified: Union[str, NoneType] = None, upper_traditional: Union[str, NoneType] = None)
simplified: Union[str, NoneType] = None

Simplified Chinese character

traditional: Union[str, NoneType] = None

Traditional Chinese character

upper_simplified: Union[str, NoneType] = None

Capitalized character in simplified Chinese. Defaults to None means same as self.simplified.

upper_traditional: Union[str, NoneType] = None

Capitalized character in traditional Chinese. Defaults to None means same as self.traditional.

all_forms: str
106    @property
107    def all_forms(self) -> str:
108        """All forms of the character"""
109        return "".join(v for v in self.__dict__.values() if isinstance(v, str))

All forms of the character

class CNUnit(CNChar):
126class CNUnit(CNChar):
127    """Chinese number unit class
128
129    Each of it is an `CNChar` with additional upper type strings.
130
131    Example:
132    ```python
133
134    >>> wan = CNUnit(*"万萬萬萬", power=4)
135    >>> wan
136    10^4
137
138    ```
139    """
140
141    power: int = 0
142    r"""The power of this unit, e.g. `power` = 4 for `'万'` ( \(10^4\) )"""
143
144    def __str__(self) -> str:
145        return f"10^{self.power}"
146
147    def __repr__(self) -> str:
148        return str(self)
149
150    @classmethod
151    def create(cls, power: int, numbering_type: NumberingType = NumberingType.MID) -> CNUnit:
152        """Create one unit character based on power value from constants
153
154        - `SMALLER_CHINESE_NUMBERING_UNITS_SIMPLIFIED`
155        - `SMALLER_CHINESE_NUMBERING_UNITS_TRADITIONAL`
156        - `LARGER_CHINESE_NUMBERING_UNITS_SIMPLIFIED`
157        - `LARGER_CHINESE_NUMBERING_UNITS_TRADITIONAL`
158
159        Example:
160        ```python
161
162        >>> CNUnit.create(power=8, numbering_type=NumberingType.LOW).simplified
163        '亿'
164        >>> CNUnit.create(power=12, numbering_type=NumberingType.LOW).simplified
165        '秭'
166        >>> CNUnit.create(power=12, numbering_type=NumberingType.HIGH).simplified
167        Traceback (most recent call last):
168        ...
169        ValueError: power = 12 is invalid for numbering_type = <NumberingType.HIGH: 'high'>
170
171        ```
172
173        Args:
174            power (int): Unit power, starting from 1.
175            numbering_type (NumberingType, optional): Numbering type. Defaults to `NumberingType.MID`.
176
177        Raises:
178            ValueError: Raised when invalid `numbering_type` is provided
179
180        Returns:
181            CNUnit: Created unit object
182        """
183        assert power > 0, "Power should be greater than 0."
184
185        if power < 5:
186            return cls(*UNITS[power - 1], power=power)  # type: ignore[misc]
187
188        i = float(power - 8)
189        if numbering_type == NumberingType.LOW:
190            pass
191        elif numbering_type == NumberingType.MID:
192            i = i / 4
193        elif numbering_type == NumberingType.HIGH:
194            i = log2(power) - 3
195        else:
196            raise ValueError(f"Numbering type should be in {NumberingType} but {numbering_type} is provided.")
197
198        i = int(i) if i.is_integer() else -1
199
200        if i < 0:
201            raise ValueError(f"{power = } is invalid for {numbering_type = }")
202
203        return cls(*UNITS[i + 4], power=power)  # type: ignore[misc]

Chinese number unit class

Each of it is an CNChar with additional upper type strings.

Example:

>>> wan = CNUnit(*"万萬萬萬", power=4)
>>> wan
10^4
CNUnit( simplified: Union[str, NoneType] = None, traditional: Union[str, NoneType] = None, upper_simplified: Union[str, NoneType] = None, upper_traditional: Union[str, NoneType] = None, power: int = 0)
power: int = 0

The power of this unit, e.g. power = 4 for '万' ( (10^4) )

@classmethod
def create( cls, power: int, numbering_type: NumberingType = <NumberingType.MID: 'mid'>) -> CNUnit:
150    @classmethod
151    def create(cls, power: int, numbering_type: NumberingType = NumberingType.MID) -> CNUnit:
152        """Create one unit character based on power value from constants
153
154        - `SMALLER_CHINESE_NUMBERING_UNITS_SIMPLIFIED`
155        - `SMALLER_CHINESE_NUMBERING_UNITS_TRADITIONAL`
156        - `LARGER_CHINESE_NUMBERING_UNITS_SIMPLIFIED`
157        - `LARGER_CHINESE_NUMBERING_UNITS_TRADITIONAL`
158
159        Example:
160        ```python
161
162        >>> CNUnit.create(power=8, numbering_type=NumberingType.LOW).simplified
163        '亿'
164        >>> CNUnit.create(power=12, numbering_type=NumberingType.LOW).simplified
165        '秭'
166        >>> CNUnit.create(power=12, numbering_type=NumberingType.HIGH).simplified
167        Traceback (most recent call last):
168        ...
169        ValueError: power = 12 is invalid for numbering_type = <NumberingType.HIGH: 'high'>
170
171        ```
172
173        Args:
174            power (int): Unit power, starting from 1.
175            numbering_type (NumberingType, optional): Numbering type. Defaults to `NumberingType.MID`.
176
177        Raises:
178            ValueError: Raised when invalid `numbering_type` is provided
179
180        Returns:
181            CNUnit: Created unit object
182        """
183        assert power > 0, "Power should be greater than 0."
184
185        if power < 5:
186            return cls(*UNITS[power - 1], power=power)  # type: ignore[misc]
187
188        i = float(power - 8)
189        if numbering_type == NumberingType.LOW:
190            pass
191        elif numbering_type == NumberingType.MID:
192            i = i / 4
193        elif numbering_type == NumberingType.HIGH:
194            i = log2(power) - 3
195        else:
196            raise ValueError(f"Numbering type should be in {NumberingType} but {numbering_type} is provided.")
197
198        i = int(i) if i.is_integer() else -1
199
200        if i < 0:
201            raise ValueError(f"{power = } is invalid for {numbering_type = }")
202
203        return cls(*UNITS[i + 4], power=power)  # type: ignore[misc]

Create one unit character based on power value from constants

  • SMALLER_CHINESE_NUMBERING_UNITS_SIMPLIFIED
  • SMALLER_CHINESE_NUMBERING_UNITS_TRADITIONAL
  • LARGER_CHINESE_NUMBERING_UNITS_SIMPLIFIED
  • LARGER_CHINESE_NUMBERING_UNITS_TRADITIONAL

Example:

>>> CNUnit.create(power=8, numbering_type=NumberingType.LOW).simplified
'亿'
>>> CNUnit.create(power=12, numbering_type=NumberingType.LOW).simplified
'秭'
>>> CNUnit.create(power=12, numbering_type=NumberingType.HIGH).simplified
Traceback (most recent call last):
...
ValueError: power = 12 is invalid for numbering_type = <NumberingType.HIGH: 'high'>
Arguments:
  • power (int): Unit power, starting from 1.
  • numbering_type (NumberingType, optional): Numbering type. Defaults to NumberingType.MID.
Raises:
  • ValueError: Raised when invalid numbering_type is provided
Returns:

CNUnit: Created unit object

class CNDigit(CNChar):
207class CNDigit(CNChar):
208    """Chinese number digit class
209
210    Example:
211    ```python
212
213    >>> CNDigit(*"三叁叁叁", int_value=3)
214    3
215
216    ```
217    """
218
219    int_value: int = 0
220    """Integer value of the digit, 0 to 9. Defaults to 0."""
221
222    alt_s: Optional[str] = None
223    """Alternative simplified character, e.g. '两' for 2. Defaults to `None`.
224    """
225
226    alt_t: Optional[str] = None
227    """Alternative traditional character, e.g. '俩' for 2. Defaults to `None`.
228    """
229
230    def __str__(self):
231        return str(self.int_value)
232
233    def __repr__(self):
234        return str(self)

Chinese number digit class

Example:

>>> CNDigit(*"三叁叁叁", int_value=3)
3
CNDigit( simplified: Union[str, NoneType] = None, traditional: Union[str, NoneType] = None, upper_simplified: Union[str, NoneType] = None, upper_traditional: Union[str, NoneType] = None, int_value: int = 0, alt_s: Union[str, NoneType] = None, alt_t: Union[str, NoneType] = None)
int_value: int = 0

Integer value of the digit, 0 to 9. Defaults to 0.

alt_s: Union[str, NoneType] = None

Alternative simplified character, e.g. '两' for 2. Defaults to None.

alt_t: Union[str, NoneType] = None

Alternative traditional character, e.g. '俩' for 2. Defaults to None.

class CNMath(CNChar):
238class CNMath(CNChar):
239    """
240    Chinese math operators
241
242    Example:
243    ```python
244
245    >>> positive = CNMath(*"正正正正", symbol="+", expression=lambda x: +x)
246    >>> positive.symbol
247    '+'
248
249    ```
250    """
251
252    symbol: str = ""
253    """Mathematical symbol, e.g. '+'. Defaults to ``."""
254
255    expression: Optional[Callable] = None
256    """Mathematical expression, e.g. `lambda x: +x`. Defaults to `None`."""
257
258    def __str__(self):
259        return self.symbol
260
261    def __repr__(self):
262        return str(self)

Chinese math operators

Example:

>>> positive = CNMath(*"正正正正", symbol="+", expression=lambda x: +x)
>>> positive.symbol
'+'
CNMath( simplified: Union[str, NoneType] = None, traditional: Union[str, NoneType] = None, upper_simplified: Union[str, NoneType] = None, upper_traditional: Union[str, NoneType] = None, symbol: str = '', expression: Union[Callable, NoneType] = None)
symbol: str = ''

Mathematical symbol, e.g. '+'. Defaults to ``.

expression: Union[Callable, NoneType] = None

Mathematical expression, e.g. lambda x: +x. Defaults to None.

SymbolType = typing.Union[CNUnit, CNDigit, CNMath]
class MathSymbols:
269class MathSymbols:
270    """Math symbols used in Chinese for both traditional and simplified Chinese
271
272    - positive = ['正', '正']
273    - negative = ['负', '負']
274    - point = ['点', '點']
275
276    Used in `NumberingSystem`.
277
278    Example:
279    ```python
280
281    >>> positive = CNMath(*"正正正正", symbol="+", expression=lambda x: +x)
282    >>> negative = CNMath(*"负負负負", symbol="-", expression=lambda x: -x)
283    >>> point = CNMath(*"点點点點", symbol=".", expression=lambda integer, decimal: float(str(integer) + "." + str(decimal)))
284    >>> math = MathSymbols(positive, negative, point)
285    >>> math.positive
286    +
287    >>> list(math)
288    [+, -, .]
289    >>> for i in math:
290    ...     print(i)
291    +
292    -
293    .
294
295    ```
296    """
297
298    positive: CNMath
299    """Positive"""
300
301    negative: CNMath
302    """Negative"""
303
304    point: CNMath
305    """Decimal point"""
306
307    def __iter__(self):
308        for v in self.__dict__.values():
309            yield v

Math symbols used in Chinese for both traditional and simplified Chinese

  • positive = ['正', '正']
  • negative = ['负', '負']
  • point = ['点', '點']

Used in NumberingSystem.

Example:

>>> positive = CNMath(*"正正正正", symbol="+", expression=lambda x: +x)
>>> negative = CNMath(*"负負负負", symbol="-", expression=lambda x: -x)
>>> point = CNMath(*"点點点點", symbol=".", expression=lambda integer, decimal: float(str(integer) + "." + str(decimal)))
>>> math = MathSymbols(positive, negative, point)
>>> math.positive
+
>>> list(math)
[+, -, .]
>>> for i in math:
...     print(i)
+
-
.
MathSymbols( positive: CNMath, negative: CNMath, point: CNMath)
positive: CNMath

Positive

negative: CNMath

Negative

point: CNMath

Decimal point

class NumberingSystem:
312class NumberingSystem:
313    """Numbering system class
314
315    Example:
316    ```python
317
318    >>> system = NumberingSystem(NumberingType.MID)
319    >>> system.numbering_type
320    <NumberingType.MID: 'mid'>
321    >>> system.digits[0]
322    0
323    >>> system.units[0]
324    10^1
325    >>> system.units[7].simplified
326    '垓'
327    >>> system.math.positive
328    +
329
330    ```
331    """
332
333    # region: fields
334    _numbering_type: NumberingType
335    """Numbering type"""
336
337    _digits: List[CNDigit]
338    """Digits"""
339
340    _units: List[CNUnit]
341    """Units"""
342
343    _maths: MathSymbols
344    """Math symbols"""
345
346    @property
347    def numbering_type(self) -> NumberingType:
348        """Numbering type"""
349        return self._numbering_type
350
351    @numbering_type.setter
352    def numbering_type(self, value: NumberingType):
353        self._numbering_type = value
354        self._units[4:] = [CNUnit.create(i, self._numbering_type) for i in self._numbering_type.powers]
355
356    @property
357    def digits(self) -> List[CNDigit]:
358        """Digits"""
359        result = [CNDigit(*v, int_value=i) for i, v in enumerate(DIGITS)]  # type: ignore[misc]
360        result[0].alt_s, result[0].alt_t = "〇", "〇"
361        result[2].alt_s, result[2].alt_t = "两", "兩"
362        return result
363
364    @property
365    def units(self) -> List[CNUnit]:
366        """Units"""
367        return self._units
368
369    @cached_property
370    def math(self) -> MathSymbols:
371        """Math symbols"""
372        positive_cn = CNMath(*"正正", symbol="+", expression=lambda x: x)  # type: ignore[misc]
373        negative_cn = CNMath(*"负負", symbol="-", expression=lambda x: -x)  # type: ignore[misc]
374        point_cn = CNMath(*"点點", symbol=".", expression=lambda i, d: float(f"{i}.{d}"))  # type: ignore[misc]
375        return MathSymbols(positive_cn, negative_cn, point_cn)
376
377    # endregion: fields
378
379    def __init__(self, numbering_type: NumberingType = NumberingType.MID) -> None:
380        """Construction"""
381        self._numbering_type = numbering_type
382        self._units = [CNUnit(*UNITS[i], power=i + 1) for i in range(4)]  # type: ignore[misc]
383        self._units[4:] = [CNUnit.create(i, self._numbering_type) for i in self._numbering_type.powers]
384
385    def __getitem__(self, key: str) -> SymbolType:
386        if not isinstance(key, str):
387            raise ValueError(f"{key = } should be a string.")
388
389        for c in self.units + self.digits + list(self.math):
390            if key in c.all_forms:
391                return c
392
393        raise ValueError(f"{key} is not in {self.numbering_type.name} system.")
394
395    def cn2symbols(self, cn_str: str) -> Tuple[List[SymbolType], List[SymbolType]]:
396        """Chinese string to symbols
397
398        Example:
399        ```python
400
401        >>> system = NumberingSystem(NumberingType.MID)
402        >>> system.cn2symbols("一百八")
403        ([1, 10^2, 8], [])
404        >>> system.cn2symbols("一百八十")
405        ([1, 10^2, 8, 10^1], [])
406        >>> system.cn2symbols("一百八点五六七")
407        ([1, 10^2, 8], [5, 6, 7])
408        >>> system.cn2symbols("两千万一百八十")
409        ([2, 10^7, 1, 10^2, 8, 10^1], [])
410        >>> system.cn2symbols("正两千万一百八十")
411        ([+, 2, 10^7, 1, 10^2, 8, 10^1], [])
412        >>> system.cn2symbols("负两千万一百八十")
413        ([-, 2, 10^7, 1, 10^2, 8, 10^1], [])
414        >>> system.cn2symbols("点负两千万一百八十")
415        Traceback (most recent call last):
416        ...
417        ValueError: First symbol in decimal part should not be a math symbol, - is provided.
418        >>> system.cn2symbols("两千万点一百点八十")
419        Traceback (most recent call last):
420        ...
421        ValueError: Multiple points in the number 两千万点一百点八十.
422        >>> system.cn2symbols("两千万点一百點八十")
423        Traceback (most recent call last):
424        ...
425        ValueError: Multiple points in the number 两千万点一百點八十.
426
427        ```
428
429        Args:
430            cn_str (str): Chinese number
431
432        Returns:
433            Tuple[List[SymbolType], List[SymbolType]]: Integer symbols, decimal symbols
434        """
435        if cn_str == "":
436            return [], []
437
438        int_part, dec_part = cn_str, ""
439        int_dec = re.split(r"\.|点|點", cn_str)
440        if len(int_dec) > 2:
441            raise ValueError(f"Multiple points in the number {cn_str}.")
442        int_part, dec_part = int_dec if len(int_dec) == 2 else (int_dec[0], "")
443
444        integer_value = [copy.deepcopy(self[c]) for c in int_part]
445
446        for i, v in enumerate(integer_value):
447            if not isinstance(v, CNUnit):
448                continue
449            if i + 1 < len(integer_value) and isinstance(integer_value[i + 1], CNUnit):
450                v.power += integer_value[i + 1].power  # type: ignore[union-attr]
451                integer_value[i + 1] = None  # type: ignore[union-attr]
452
453        integer_value = [v for v in integer_value if v is not None]
454
455        for i, v in enumerate(integer_value):
456            if not isinstance(v, CNUnit):
457                continue
458            for u in integer_value[i + 1 :]:
459                if isinstance(u, CNUnit) and u.power > v.power:
460                    v.power += u.power
461                    break
462
463        decimal_value = [copy.deepcopy(self[c]) for c in dec_part]
464
465        # if first symbol is a math symbol, e.g. '正两千万一百八十'
466        if int_part and (first_symbol := [c for c in self.math if int_part[0] in c.all_forms]):
467            integer_value[0] = first_symbol[0]
468
469        # if first symbol is a math symbol, e.g. '点负两千万一百八十'
470        if decimal_value and (decimal_value[0] in self.math):
471            raise ValueError(
472                f"First symbol in decimal part should not be a math symbol, {decimal_value[0]} is provided."
473            )
474
475        return integer_value, decimal_value
476
477    def _refine_integer_symbols(self, integer_symbols: List[SymbolType]) -> List[SymbolType]:
478        """Refine integer symbols. Do not change math symbols.
479
480        Example:
481        ```python
482
483        >>> s = NumberingSystem(NumberingType.MID)
484        >>> s._refine_integer_symbols(s.cn2symbols("十八")[0])
485        [1, 10^1, 8, 10^0]
486        >>> s._refine_integer_symbols(s.cn2symbols("两千万一百八十")[0])
487        [2, 10^7, 1, 10^2, 8, 10^1]
488        >>> s._refine_integer_symbols(s.cn2symbols("两千万零一百八十")[0])
489        [2, 10^7, 1, 10^2, 8, 10^1]
490        >>> s._refine_integer_symbols(s.cn2symbols("两亿六")[0])
491        [2, 10^8, 6, 10^7]
492
493        ```
494
495        Args:
496            integer_symbols (List[SymbolType]): Raw integer symbols
497
498        Returns:
499            List[SymbolType]: Refined symbols
500        """
501        if not integer_symbols:
502            return integer_symbols
503
504        # first symbol is unit, add 1 before it, e.g. , "十五" to "一十五"
505        if isinstance(integer_symbols[0], CNUnit) and integer_symbols[0].power == 1:
506            integer_symbols = [self.digits[1]] + integer_symbols
507
508        # last symbol is digit and the second last symbol is unit
509        # e.g. "十 五" to "十 五 10^0",  "二 百 五" to "二 百 五 10^1"
510        if len(integer_symbols) > 1:
511            if isinstance(integer_symbols[-1], CNDigit) and isinstance(integer_symbols[-2], CNUnit):
512                # add a dummy unit
513                integer_symbols += [CNUnit(power=integer_symbols[-2].power - 1)]
514
515        result: List[SymbolType] = []
516        unit_count = 0
517        for s in integer_symbols:
518            if isinstance(s, CNMath):
519                # reset unit_count, e.g. "两千万" has two units
520                unit_count = 0
521                continue
522            if isinstance(s, CNDigit):
523                # reset unit_count, e.g. "两千万" has two units
524                unit_count = 0
525                if s.int_value > 0:
526                    result.append(s)
527                continue
528            if not isinstance(s, CNUnit):
529                raise ValueError(f"Invalid symbol {s} in {integer_symbols}.")
530
531            # create a dummy unit
532            current_unit = CNUnit("", "", "", "", s.power)
533            unit_count += 1
534
535            # store the first met unit
536            if unit_count == 1:
537                result.append(current_unit)
538                continue
539
540            # if there are more than one units, sum them, e.g. "两 千 万" to "两 10^7"
541            result[-1].power += current_unit.power  # type: ignore[union-attr]
542        if integer_symbols[0] == self.math.negative:
543            result = [self.math.negative] + result
544        return result
545
546    def get_int_value(self, integer_symbols: List[SymbolType]) -> int:
547        """Compute the value from symbol
548
549        Example:
550        ```python
551
552        >>> s = NumberingSystem(NumberingType.MID)
553        >>> s.get_int_value(s.cn2symbols("十八")[0])
554        18
555        >>> s.get_int_value(s.cn2symbols("两千万一百八十")[0])
556        20000180
557        >>> s.get_int_value(s.cn2symbols("两亿六")[0])
558        260000000
559
560        ```
561
562        Args:
563            integer_symbols (List[SymbolType]): Symbols, without point
564
565        Returns:
566            int: value
567        """
568        integer_symbols = self._refine_integer_symbols(integer_symbols)
569
570        value = [0]
571        last_power = 0
572        for s in integer_symbols:
573            if isinstance(s, CNDigit):
574                value[-1] = s.int_value
575            elif isinstance(s, CNUnit):
576                value[-1] *= pow(10, s.power)
577                if s.power > last_power:
578                    value[:-1] = [v * pow(10, s.power) for v in value[:-1]]  # pylint: disable=no-member
579                    last_power = s.power
580                value.append(0)
581        return sum(value)
582
583    def int2symbols(self, int_value: Union[int, str]) -> List[SymbolType]:
584        """Integer to symbols
585
586        Example:
587        ```python
588
589        >>> s = NumberingSystem(NumberingType.MID)
590        >>> s.int2symbols(18)
591        [1, 10^1, 8]
592        >>> s.int2symbols(20000180)
593        [2, 10^3, 10^4, 0, 1, 10^2, 8, 10^1]
594        >>> s.int2symbols(26)
595        [2, 10^1, 6]
596        >>> s.int2symbols(320)
597        [3, 10^2, 2, 10^1]
598        >>> s.int2symbols(220)
599        [2, 10^2, 2, 10^1]
600        >>> s.int2symbols("220")
601        [2, 10^2, 2, 10^1]
602
603        ```
604
605        Args:
606            int_value (Union[int, str]): Value string, e.g. "0.1", "34"
607
608        Returns:
609            List[SymbolType]: List of values
610        """
611        value_string = str(int_value)
612        striped_string = value_string.lstrip("0")
613
614        # record nothing if all zeros
615        if not striped_string:
616            return []
617
618        # record one digits
619        if len(striped_string) == 1:
620            result: List[SymbolType] = [self.digits[int(striped_string)]]
621            if len(value_string) != len(striped_string):
622                result = [self.digits[0] for _ in range(len(value_string) - len(striped_string))] + result
623            return result
624
625        # recursively record multiple digits
626
627        # find the unit for the first digit, e.g. 123 -> 10^2
628        result_unit = next(u for u in reversed(self.units) if u.power < len(striped_string))
629
630        # get the first part of the number, e.g. 123 -> 1
631        result_string = value_string[: -result_unit.power]
632
633        # recursively record the first part of the number, e.g. 123 -> [1, 10^2, 2, 10^1, 3]
634        return self.int2symbols(result_string) + [result_unit] + self.int2symbols(striped_string[-result_unit.power :])
635
636    def alt_two_symbols(self, integer_symbols: List[SymbolType]) -> List[SymbolType]:
637        """Alternative two symbols
638        e.g. "二百二" to "两百二", "二千二" to "两千二", "三亿零二万二" to "三亿零两万二
639
640        Args:
641            integer_symbols (List[SymbolType]): Symbols
642
643        Returns:
644            List[SymbolType]: Symbols
645        """
646        liang = self.digits[2]
647        for i, v in enumerate(integer_symbols):
648            if not isinstance(v, CNDigit):
649                continue
650            if v.int_value != 2:
651                continue
652            next_symbol = integer_symbols[i + 1] if i < len(integer_symbols) - 1 else None
653            previous_symbol = integer_symbols[i - 1] if i > 0 else None
654
655            # if the next symbol is not a unit, skip
656            if not isinstance(next_symbol, CNUnit):
657                continue
658
659            # e.g. "一亿零二百" leading_zero = True
660            leading_zero = getattr(previous_symbol, "int_value", "invalid") == 0  # False == 0 in Python
661
662            # e.g. "二百二" to "两百二"
663            previous_is_unit = isinstance(previous_symbol, CNUnit)
664
665            if not (leading_zero or previous_is_unit or (previous_symbol is None)):
666                continue
667
668            # e.g. "二百二" to "两百二", "二千二" to "两千二"
669            if next_symbol.power > 1:
670                integer_symbols[i].simplified = liang.alt_s
671                integer_symbols[i].traditional = liang.alt_t
672                integer_symbols[i].upper_simplified = liang.alt_s
673                integer_symbols[i].upper_traditional = liang.alt_t
674        return integer_symbols

Numbering system class

Example:

>>> system = NumberingSystem(NumberingType.MID)
>>> system.numbering_type
<NumberingType.MID: 'mid'>
>>> system.digits[0]
0
>>> system.units[0]
10^1
>>> system.units[7].simplified
'垓'
>>> system.math.positive
+
NumberingSystem( numbering_type: NumberingType = <NumberingType.MID: 'mid'>)
379    def __init__(self, numbering_type: NumberingType = NumberingType.MID) -> None:
380        """Construction"""
381        self._numbering_type = numbering_type
382        self._units = [CNUnit(*UNITS[i], power=i + 1) for i in range(4)]  # type: ignore[misc]
383        self._units[4:] = [CNUnit.create(i, self._numbering_type) for i in self._numbering_type.powers]

Construction

numbering_type: NumberingType
346    @property
347    def numbering_type(self) -> NumberingType:
348        """Numbering type"""
349        return self._numbering_type

Numbering type

digits: List[CNDigit]
356    @property
357    def digits(self) -> List[CNDigit]:
358        """Digits"""
359        result = [CNDigit(*v, int_value=i) for i, v in enumerate(DIGITS)]  # type: ignore[misc]
360        result[0].alt_s, result[0].alt_t = "〇", "〇"
361        result[2].alt_s, result[2].alt_t = "两", "兩"
362        return result

Digits

units: List[CNUnit]
364    @property
365    def units(self) -> List[CNUnit]:
366        """Units"""
367        return self._units

Units

math: MathSymbols
369    @cached_property
370    def math(self) -> MathSymbols:
371        """Math symbols"""
372        positive_cn = CNMath(*"正正", symbol="+", expression=lambda x: x)  # type: ignore[misc]
373        negative_cn = CNMath(*"负負", symbol="-", expression=lambda x: -x)  # type: ignore[misc]
374        point_cn = CNMath(*"点點", symbol=".", expression=lambda i, d: float(f"{i}.{d}"))  # type: ignore[misc]
375        return MathSymbols(positive_cn, negative_cn, point_cn)

Math symbols

def cn2symbols( self, cn_str: str) -> Tuple[List[Union[CNUnit, CNDigit, CNMath]], List[Union[CNUnit, CNDigit, CNMath]]]:
395    def cn2symbols(self, cn_str: str) -> Tuple[List[SymbolType], List[SymbolType]]:
396        """Chinese string to symbols
397
398        Example:
399        ```python
400
401        >>> system = NumberingSystem(NumberingType.MID)
402        >>> system.cn2symbols("一百八")
403        ([1, 10^2, 8], [])
404        >>> system.cn2symbols("一百八十")
405        ([1, 10^2, 8, 10^1], [])
406        >>> system.cn2symbols("一百八点五六七")
407        ([1, 10^2, 8], [5, 6, 7])
408        >>> system.cn2symbols("两千万一百八十")
409        ([2, 10^7, 1, 10^2, 8, 10^1], [])
410        >>> system.cn2symbols("正两千万一百八十")
411        ([+, 2, 10^7, 1, 10^2, 8, 10^1], [])
412        >>> system.cn2symbols("负两千万一百八十")
413        ([-, 2, 10^7, 1, 10^2, 8, 10^1], [])
414        >>> system.cn2symbols("点负两千万一百八十")
415        Traceback (most recent call last):
416        ...
417        ValueError: First symbol in decimal part should not be a math symbol, - is provided.
418        >>> system.cn2symbols("两千万点一百点八十")
419        Traceback (most recent call last):
420        ...
421        ValueError: Multiple points in the number 两千万点一百点八十.
422        >>> system.cn2symbols("两千万点一百點八十")
423        Traceback (most recent call last):
424        ...
425        ValueError: Multiple points in the number 两千万点一百點八十.
426
427        ```
428
429        Args:
430            cn_str (str): Chinese number
431
432        Returns:
433            Tuple[List[SymbolType], List[SymbolType]]: Integer symbols, decimal symbols
434        """
435        if cn_str == "":
436            return [], []
437
438        int_part, dec_part = cn_str, ""
439        int_dec = re.split(r"\.|点|點", cn_str)
440        if len(int_dec) > 2:
441            raise ValueError(f"Multiple points in the number {cn_str}.")
442        int_part, dec_part = int_dec if len(int_dec) == 2 else (int_dec[0], "")
443
444        integer_value = [copy.deepcopy(self[c]) for c in int_part]
445
446        for i, v in enumerate(integer_value):
447            if not isinstance(v, CNUnit):
448                continue
449            if i + 1 < len(integer_value) and isinstance(integer_value[i + 1], CNUnit):
450                v.power += integer_value[i + 1].power  # type: ignore[union-attr]
451                integer_value[i + 1] = None  # type: ignore[union-attr]
452
453        integer_value = [v for v in integer_value if v is not None]
454
455        for i, v in enumerate(integer_value):
456            if not isinstance(v, CNUnit):
457                continue
458            for u in integer_value[i + 1 :]:
459                if isinstance(u, CNUnit) and u.power > v.power:
460                    v.power += u.power
461                    break
462
463        decimal_value = [copy.deepcopy(self[c]) for c in dec_part]
464
465        # if first symbol is a math symbol, e.g. '正两千万一百八十'
466        if int_part and (first_symbol := [c for c in self.math if int_part[0] in c.all_forms]):
467            integer_value[0] = first_symbol[0]
468
469        # if first symbol is a math symbol, e.g. '点负两千万一百八十'
470        if decimal_value and (decimal_value[0] in self.math):
471            raise ValueError(
472                f"First symbol in decimal part should not be a math symbol, {decimal_value[0]} is provided."
473            )
474
475        return integer_value, decimal_value

Chinese string to symbols

Example:

>>> system = NumberingSystem(NumberingType.MID)
>>> system.cn2symbols("一百八")
([1, 10^2, 8], [])
>>> system.cn2symbols("一百八十")
([1, 10^2, 8, 10^1], [])
>>> system.cn2symbols("一百八点五六七")
([1, 10^2, 8], [5, 6, 7])
>>> system.cn2symbols("两千万一百八十")
([2, 10^7, 1, 10^2, 8, 10^1], [])
>>> system.cn2symbols("正两千万一百八十")
([+, 2, 10^7, 1, 10^2, 8, 10^1], [])
>>> system.cn2symbols("负两千万一百八十")
([-, 2, 10^7, 1, 10^2, 8, 10^1], [])
>>> system.cn2symbols("点负两千万一百八十")
Traceback (most recent call last):
...
ValueError: First symbol in decimal part should not be a math symbol, - is provided.
>>> system.cn2symbols("两千万点一百点八十")
Traceback (most recent call last):
...
ValueError: Multiple points in the number 两千万点一百点八十.
>>> system.cn2symbols("两千万点一百點八十")
Traceback (most recent call last):
...
ValueError: Multiple points in the number 两千万点一百點八十.
Arguments:
  • cn_str (str): Chinese number
Returns:

Tuple[List[SymbolType], List[SymbolType]]: Integer symbols, decimal symbols

def get_int_value( self, integer_symbols: List[Union[CNUnit, CNDigit, CNMath]]) -> int:
546    def get_int_value(self, integer_symbols: List[SymbolType]) -> int:
547        """Compute the value from symbol
548
549        Example:
550        ```python
551
552        >>> s = NumberingSystem(NumberingType.MID)
553        >>> s.get_int_value(s.cn2symbols("十八")[0])
554        18
555        >>> s.get_int_value(s.cn2symbols("两千万一百八十")[0])
556        20000180
557        >>> s.get_int_value(s.cn2symbols("两亿六")[0])
558        260000000
559
560        ```
561
562        Args:
563            integer_symbols (List[SymbolType]): Symbols, without point
564
565        Returns:
566            int: value
567        """
568        integer_symbols = self._refine_integer_symbols(integer_symbols)
569
570        value = [0]
571        last_power = 0
572        for s in integer_symbols:
573            if isinstance(s, CNDigit):
574                value[-1] = s.int_value
575            elif isinstance(s, CNUnit):
576                value[-1] *= pow(10, s.power)
577                if s.power > last_power:
578                    value[:-1] = [v * pow(10, s.power) for v in value[:-1]]  # pylint: disable=no-member
579                    last_power = s.power
580                value.append(0)
581        return sum(value)

Compute the value from symbol

Example:

>>> s = NumberingSystem(NumberingType.MID)
>>> s.get_int_value(s.cn2symbols("十八")[0])
18
>>> s.get_int_value(s.cn2symbols("两千万一百八十")[0])
20000180
>>> s.get_int_value(s.cn2symbols("两亿六")[0])
260000000
Arguments:
  • integer_symbols (List[SymbolType]): Symbols, without point
Returns:

int: value

def int2symbols( self, int_value: Union[int, str]) -> List[Union[CNUnit, CNDigit, CNMath]]:
583    def int2symbols(self, int_value: Union[int, str]) -> List[SymbolType]:
584        """Integer to symbols
585
586        Example:
587        ```python
588
589        >>> s = NumberingSystem(NumberingType.MID)
590        >>> s.int2symbols(18)
591        [1, 10^1, 8]
592        >>> s.int2symbols(20000180)
593        [2, 10^3, 10^4, 0, 1, 10^2, 8, 10^1]
594        >>> s.int2symbols(26)
595        [2, 10^1, 6]
596        >>> s.int2symbols(320)
597        [3, 10^2, 2, 10^1]
598        >>> s.int2symbols(220)
599        [2, 10^2, 2, 10^1]
600        >>> s.int2symbols("220")
601        [2, 10^2, 2, 10^1]
602
603        ```
604
605        Args:
606            int_value (Union[int, str]): Value string, e.g. "0.1", "34"
607
608        Returns:
609            List[SymbolType]: List of values
610        """
611        value_string = str(int_value)
612        striped_string = value_string.lstrip("0")
613
614        # record nothing if all zeros
615        if not striped_string:
616            return []
617
618        # record one digits
619        if len(striped_string) == 1:
620            result: List[SymbolType] = [self.digits[int(striped_string)]]
621            if len(value_string) != len(striped_string):
622                result = [self.digits[0] for _ in range(len(value_string) - len(striped_string))] + result
623            return result
624
625        # recursively record multiple digits
626
627        # find the unit for the first digit, e.g. 123 -> 10^2
628        result_unit = next(u for u in reversed(self.units) if u.power < len(striped_string))
629
630        # get the first part of the number, e.g. 123 -> 1
631        result_string = value_string[: -result_unit.power]
632
633        # recursively record the first part of the number, e.g. 123 -> [1, 10^2, 2, 10^1, 3]
634        return self.int2symbols(result_string) + [result_unit] + self.int2symbols(striped_string[-result_unit.power :])

Integer to symbols

Example:

>>> s = NumberingSystem(NumberingType.MID)
>>> s.int2symbols(18)
[1, 10^1, 8]
>>> s.int2symbols(20000180)
[2, 10^3, 10^4, 0, 1, 10^2, 8, 10^1]
>>> s.int2symbols(26)
[2, 10^1, 6]
>>> s.int2symbols(320)
[3, 10^2, 2, 10^1]
>>> s.int2symbols(220)
[2, 10^2, 2, 10^1]
>>> s.int2symbols("220")
[2, 10^2, 2, 10^1]
Arguments:
  • int_value (Union[int, str]): Value string, e.g. "0.1", "34"
Returns:

List[SymbolType]: List of values

def alt_two_symbols( self, integer_symbols: List[Union[CNUnit, CNDigit, CNMath]]) -> List[Union[CNUnit, CNDigit, CNMath]]:
636    def alt_two_symbols(self, integer_symbols: List[SymbolType]) -> List[SymbolType]:
637        """Alternative two symbols
638        e.g. "二百二" to "两百二", "二千二" to "两千二", "三亿零二万二" to "三亿零两万二
639
640        Args:
641            integer_symbols (List[SymbolType]): Symbols
642
643        Returns:
644            List[SymbolType]: Symbols
645        """
646        liang = self.digits[2]
647        for i, v in enumerate(integer_symbols):
648            if not isinstance(v, CNDigit):
649                continue
650            if v.int_value != 2:
651                continue
652            next_symbol = integer_symbols[i + 1] if i < len(integer_symbols) - 1 else None
653            previous_symbol = integer_symbols[i - 1] if i > 0 else None
654
655            # if the next symbol is not a unit, skip
656            if not isinstance(next_symbol, CNUnit):
657                continue
658
659            # e.g. "一亿零二百" leading_zero = True
660            leading_zero = getattr(previous_symbol, "int_value", "invalid") == 0  # False == 0 in Python
661
662            # e.g. "二百二" to "两百二"
663            previous_is_unit = isinstance(previous_symbol, CNUnit)
664
665            if not (leading_zero or previous_is_unit or (previous_symbol is None)):
666                continue
667
668            # e.g. "二百二" to "两百二", "二千二" to "两千二"
669            if next_symbol.power > 1:
670                integer_symbols[i].simplified = liang.alt_s
671                integer_symbols[i].traditional = liang.alt_t
672                integer_symbols[i].upper_simplified = liang.alt_s
673                integer_symbols[i].upper_traditional = liang.alt_t
674        return integer_symbols

Alternative two symbols e.g. "二百二" to "两百二", "二千二" to "两千二", "三亿零二万二" to "三亿零两万二

Arguments:
  • integer_symbols (List[SymbolType]): Symbols
Returns:

List[SymbolType]: Symbols

def cn2num( chinese_string: str, numbering_type: Union[str, NumberingType] = <NumberingType.MID: 'mid'>) -> Union[int, float]:
677def cn2num(
678    chinese_string: str,
679    numbering_type: Union[str, NumberingType] = NumberingType.MID,
680) -> Union[int, float]:
681    """Convert Chinese number to `int` or `float` value。
682
683    Example:
684    ```python
685
686    >>> cn2num("负零点五")
687    -0.5
688    >>> cn2num("一百八")
689    180
690    >>> cn2num("一百八十")
691    180
692    >>> cn2num("一百八点五六七")
693    180.567
694    >>> cn2num("两千万一百八十")
695    20000180
696    >>> cn2num("正两千万一百八十")
697    20000180
698
699    ```
700
701    Args:
702        chinese_string (str): Chinese number.
703        numbering_type (Union[str, NumberingType], optional): numbering type. Defaults to `NumberingType.MID`.
704
705    Raises:
706        ValueError: Raised when a character is not in the numbering system, e.g. '你' is not a number nor a unit
707
708    Returns:
709        Union[int, float]: `int` or `float` value
710    """
711
712    numbering_type = NumberingType(numbering_type) if isinstance(numbering_type, str) else numbering_type
713
714    system = NumberingSystem(numbering_type)
715    int_part, dec_part = system.cn2symbols(chinese_string)
716    int_value = system.get_int_value(int_part)
717
718    # skip unit in decimal value
719    dec_str = "".join([str(d.int_value) for d in dec_part if isinstance(d, CNDigit)])
720
721    result = float(f"{int_value}.{dec_str}") if dec_part else int_value
722    if int_part and int_part[0] == system.math.negative:
723        result = -result
724    return result

Convert Chinese number to int or float value。

Example:

>>> cn2num("负零点五")
-0.5
>>> cn2num("一百八")
180
>>> cn2num("一百八十")
180
>>> cn2num("一百八点五六七")
180.567
>>> cn2num("两千万一百八十")
20000180
>>> cn2num("正两千万一百八十")
20000180
Arguments:
  • chinese_string (str): Chinese number.
  • numbering_type (Union[str, NumberingType], optional): numbering type. Defaults to NumberingType.MID.
Raises:
  • ValueError: Raised when a character is not in the numbering system, e.g. '你' is not a number nor a unit
Returns:

Union[int, float]: int or float value

def num2cn( num: Union[int, float, str], numbering_type: Union[str, NumberingType] = <NumberingType.MID: 'mid'>, upper: bool = False, traditional: bool = False, alt_0: bool = False, alt_2: bool = False) -> str:
728def num2cn(
729    num: Union[int, float, str],
730    numbering_type: Union[str, NumberingType] = NumberingType.MID,
731    upper: bool = False,
732    traditional: bool = False,
733    alt_0: bool = False,
734    alt_2: bool = False,
735) -> str:
736    """Integer or float value to Chinese string
737
738    Example:
739    ```python
740
741    >>> num2cn(16)
742    '十六'
743    >>> num2cn(1)
744    '一'
745    >>> num2cn(116)
746    '一百一十六'
747    >>> num2cn(2401, alt_2=True)
748    '两千四百零一'
749    >>> num2cn(101)
750    '一百零一'
751    >>> num2cn(float("3.4"), numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=False)
752    '三点四'
753    >>> num2cn("3.4", numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=False)
754    '三点四'
755    >>> num2cn(23232.005184132423423423300, numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=True)
756    '兩萬叁仟兩佰叁拾贰點零零伍壹捌肆壹叁贰肆贰肆'
757    >>> num2cn("23232.005184132423423423300", numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=True)
758    '兩萬叁仟兩佰叁拾贰點零零伍壹捌肆壹叁贰肆贰叁肆贰叁肆贰叁叁零零'
759    >>> num2cn('023232.005184132423423423300', numbering_type=NumberingType.HIGH, alt_2=False, upper=False, traditional=False)
760    '二万三千二百三十二点零零五一八四一三二四二三四二三四二三三零零'
761    >>> num2cn(111180000)
762    '一亿一千一百一十八万'
763    >>> num2cn(1821010)
764    '一百八十二万一千零一十'
765    >>> num2cn(182.1)
766    '一百八十二点一'
767    >>> num2cn('3.4')
768    '三点四'
769    >>> num2cn(16)
770    '十六'
771    >>> num2cn(10600)
772    '一万零六百'
773    >>> num2cn(110)
774    '一百一'
775    >>> num2cn(1600)
776    '一千六'
777
778    ```
779
780    Args:
781        num (Tuple[int, float, str]): `int`, `float` or `str` value
782        numbering_type (Union[str, NumberingType], optional): Numbering type. Defaults to `NumberingType.MID`.
783        upper (bool, optional): Capitalized numbers. Defaults to `False`.
784        traditional (bool, optional): Traditional Chinese. Defaults to `False`.
785        alt_0 (bool, optional): Use alternative form of zero. Defaults to `False`.
786        alt_2 (bool, optional): Use alternative form of two. Defaults to `False`.
787
788    Returns:
789        str: Chinese string
790    """
791
792    if alt_2 and upper:
793        raise ValueError("alt_2 and upper cannot be True at the same time.")
794
795    if alt_0 and upper:
796        raise ValueError("alt_0 and upper cannot be True at the same time.")
797
798    numbering_type = NumberingType(numbering_type) if isinstance(numbering_type, str) else numbering_type
799    system = NumberingSystem(numbering_type)
800
801    num_str = str(num)
802    dec_string = ""
803
804    if "." in num_str:
805        dec_string = num_str.rsplit(".", 1)[-1]
806        # dec_string = dec_string.rstrip("0")
807
808    int_symbols = system.int2symbols(int(float(num)))
809    dec_symbols = [system.digits[int(c)] for c in dec_string]
810
811    # e.g. "二百二" to "两百二", "二千二" to "两千二", "三亿零二万二" to "三亿零两万二
812    if alt_2:
813        int_symbols = system.alt_two_symbols(int_symbols)
814
815    # attribute name for simplified or traditional with upper case or not
816    attr_name = "traditional" if traditional else "simplified"
817    if upper:
818        attr_name = "upper_" + attr_name
819
820    # remove leading '一' for '十', e.g. 一十六 to 十六 in integer part
821    if len(int_symbols) > 1 and (int_symbols[0] == system.digits[1]) and (getattr(int_symbols[1], "power", -1) == 1):
822        int_symbols = int_symbols[1:]
823
824    # remove trailing units, 1600 -> 一千六, 10600 -> 一萬零六百, 101600 -> 十萬一千六 in integer part
825    if len(int_symbols) > 3 and isinstance(int_symbols[-1], CNUnit):
826        if getattr(int_symbols[-3], "power", None) == (int_symbols[-1].power + 1):
827            int_symbols = int_symbols[:-1]
828
829    int_string = "".join(getattr(s, attr_name) for s in int_symbols)
830    int_string = re.sub(r"零+", "零", int_string)  # remove multiple zeros in integer part only
831    if not int_string:
832        int_string = "零"
833
834    dec_string = "".join(getattr(s, attr_name) for s in dec_symbols)
835
836    result = int_string
837
838    if dec_string:
839        result += getattr(system.math.point, attr_name) + dec_string
840
841    if alt_0:
842        result = result.replace(getattr(system.digits[0], attr_name), str(system.digits[0].alt_s))
843
844    return result

Integer or float value to Chinese string

Example:

>>> num2cn(16)
'十六'
>>> num2cn(1)
'一'
>>> num2cn(116)
'一百一十六'
>>> num2cn(2401, alt_2=True)
'两千四百零一'
>>> num2cn(101)
'一百零一'
>>> num2cn(float("3.4"), numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=False)
'三点四'
>>> num2cn("3.4", numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=False)
'三点四'
>>> num2cn(23232.005184132423423423300, numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=True)
'兩萬叁仟兩佰叁拾贰點零零伍壹捌肆壹叁贰肆贰肆'
>>> num2cn("23232.005184132423423423300", numbering_type=NumberingType.HIGH, alt_2=True, upper=False, traditional=True)
'兩萬叁仟兩佰叁拾贰點零零伍壹捌肆壹叁贰肆贰叁肆贰叁肆贰叁叁零零'
>>> num2cn('023232.005184132423423423300', numbering_type=NumberingType.HIGH, alt_2=False, upper=False, traditional=False)
'二万三千二百三十二点零零五一八四一三二四二三四二三四二三三零零'
>>> num2cn(111180000)
'一亿一千一百一十八万'
>>> num2cn(1821010)
'一百八十二万一千零一十'
>>> num2cn(182.1)
'一百八十二点一'
>>> num2cn('3.4')
'三点四'
>>> num2cn(16)
'十六'
>>> num2cn(10600)
'一万零六百'
>>> num2cn(110)
'一百一'
>>> num2cn(1600)
'一千六'
Arguments:
  • num (Tuple[int, float, str]): int, float or str value
  • numbering_type (Union[str, NumberingType], optional): Numbering type. Defaults to NumberingType.MID.
  • upper (bool, optional): Capitalized numbers. Defaults to False.
  • traditional (bool, optional): Traditional Chinese. Defaults to False.
  • alt_0 (bool, optional): Use alternative form of zero. Defaults to False.
  • alt_2 (bool, optional): Use alternative form of two. Defaults to False.
Returns:

str: Chinese string