mgplot.run_plot
run_plot.py This code contains a function to plot and highlighted the 'runs' in a series.
1""" 2run_plot.py 3This code contains a function to plot and highlighted 4the 'runs' in a series. 5""" 6 7# --- imports 8from typing import Final 9from collections.abc import Sequence 10from pandas import Series, concat 11from matplotlib.pyplot import Axes 12from matplotlib import patheffects as pe 13 14from mgplot.settings import DataT 15from mgplot.line_plot import line_plot, LINE_KW_TYPES 16from mgplot.kw_type_checking import ( 17 limit_kwargs, 18 ExpectedTypeDict, 19 validate_kwargs, 20 validate_expected, 21 report_kwargs, 22) 23from mgplot.utilities import constrain_data, check_clean_timeseries 24from mgplot.keyword_names import ( 25 COLOR, 26 THRESHOLD, 27 ROUNDING, 28 HIGHLIGHT, 29 DIRECTION, 30 DRAWSTYLE, 31) 32 33# --- constants 34 35RUN_KW_TYPES: Final[ExpectedTypeDict] = LINE_KW_TYPES | { 36 THRESHOLD: float, 37 HIGHLIGHT: (str, Sequence, (str,)), # colors for highlighting the runs 38 DIRECTION: str, # "up", "down" or "both" 39} 40validate_expected(RUN_KW_TYPES, __name__) 41 42# --- functions 43 44 45def _identify_runs( 46 series: Series, 47 threshold: float, 48 up: bool, # False means down 49) -> tuple[Series, Series]: 50 """Identify monotonic increasing/decreasing runs.""" 51 52 diffed = series.diff() 53 change_points = concat( 54 [diffed[diffed.gt(threshold)], diffed[diffed.lt(-threshold)]] 55 ).sort_index() 56 if series.index[0] not in change_points.index: 57 starting_point = Series([0], index=[series.index[0]]) 58 change_points = concat([change_points, starting_point]).sort_index() 59 facing = change_points > 0 if up else change_points < 0 60 cycles = (facing & ~facing.shift().astype(bool)).cumsum() 61 return cycles[facing], change_points 62 63 64def _plot_runs( 65 axes: Axes, 66 series: Series, 67 up: bool, 68 **kwargs, 69) -> None: 70 """Highlight the runs of a series.""" 71 72 threshold = kwargs[THRESHOLD] 73 match kwargs.get(HIGHLIGHT): # make sure highlight is a color string 74 case str(): 75 highlight = kwargs.get(HIGHLIGHT) 76 case Sequence(): 77 highlight = kwargs[HIGHLIGHT][0] if up else kwargs[HIGHLIGHT][1] 78 case _: 79 raise ValueError( 80 f"Invalid type for highlight: {type(kwargs.get(HIGHLIGHT))}. " 81 "Expected str or Sequence." 82 ) 83 84 # highlight the runs 85 stretches, change_points = _identify_runs(series, threshold, up=up) 86 for k in range(1, stretches.max() + 1): 87 stretch = stretches[stretches == k] 88 axes.axvspan( 89 stretch.index.min(), 90 stretch.index.max(), 91 color=highlight, 92 zorder=-1, 93 ) 94 space_above = series.max() - series[stretch.index].max() 95 space_below = series[stretch.index].min() - series.min() 96 y_pos, vert_align = ( 97 (series.max(), "top") 98 if space_above > space_below 99 else (series.min(), "bottom") 100 ) 101 text = axes.text( 102 x=stretch.index.min(), 103 y=y_pos, 104 s=( 105 change_points[stretch.index].sum().round(kwargs[ROUNDING]).astype(str) 106 + " pp" 107 ), 108 va=vert_align, 109 ha="left", 110 fontsize="x-small", 111 rotation=90, 112 ) 113 text.set_path_effects([pe.withStroke(linewidth=5, foreground="w")]) 114 115 116def run_plot(data: DataT, **kwargs) -> Axes: 117 """Plot a series of percentage rates, highlighting the increasing runs. 118 119 Arguments 120 - data - ordered pandas Series of percentages, with PeriodIndex 121 - **kwargs 122 - threshold - float - used to ignore micro noise near zero 123 (for example, threshhold=0.01) 124 - round - int - rounding for highlight text 125 - highlight - str or Sequence[str] - color(s) for highlighting the 126 runs, two colors can be specified in a list if direction is "both" 127 - direction - str - whether the highlight is for an upward 128 or downward or both runs. Options are "up", "down" or "both". 129 - in addition the **kwargs for line_plot are accepted. 130 131 Return 132 - matplotlib Axes object""" 133 134 # --- check the kwargs 135 me = "run_plot" 136 report_kwargs(called_from=me, **kwargs) 137 kwargs = validate_kwargs(RUN_KW_TYPES, me, **kwargs) 138 139 # --- check the data 140 series = check_clean_timeseries(data, me) 141 if not isinstance(series, Series): 142 raise TypeError("series must be a pandas Series for run_plot()") 143 series, kwargs = constrain_data(series, **kwargs) 144 145 # --- default arguments - in **kwargs 146 kwargs[THRESHOLD] = kwargs.get(THRESHOLD, 0.1) 147 kwargs[DIRECTION] = kwargs.get(DIRECTION, "both") 148 kwargs[ROUNDING] = kwargs.get(ROUNDING, 2) 149 kwargs[HIGHLIGHT] = ( 150 kwargs.get(HIGHLIGHT, ("gold", "skyblue") if kwargs[DIRECTION] == "both" else "gold") 151 ) 152 kwargs[COLOR] = kwargs.get(COLOR, "darkblue") 153 154 # --- plot the line 155 kwargs[DRAWSTYLE] = kwargs.get(DRAWSTYLE, "steps-post") 156 lp_kwargs = limit_kwargs(LINE_KW_TYPES, **kwargs) 157 axes = line_plot(series, **lp_kwargs) 158 159 # plot the runs 160 match kwargs[DIRECTION]: 161 case "up": 162 _plot_runs(axes, series, up=True, **kwargs) 163 case "down": 164 _plot_runs(axes, series, up=False, **kwargs) 165 case "both": 166 _plot_runs(axes, series, up=True, **kwargs) 167 _plot_runs(axes, series, up=False, **kwargs) 168 case _: 169 raise ValueError( 170 f"Invalid value for direction: {kwargs[DIRECTION]}. " 171 "Expected 'up', 'down', or 'both'." 172 ) 173 return axes
RUN_KW_TYPES: Final[ExpectedTypeDict] =
{'ax': (<class 'matplotlib.axes._axes.Axes'>, <class 'NoneType'>), 'style': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,)), 'width': (<class 'float'>, <class 'int'>, <class 'collections.abc.Sequence'>, (<class 'float'>, <class 'int'>)), 'color': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,)), 'alpha': (<class 'float'>, <class 'collections.abc.Sequence'>, (<class 'float'>,)), 'drawstyle': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,), <class 'NoneType'>), 'marker': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,), <class 'NoneType'>), 'markersize': (<class 'float'>, <class 'collections.abc.Sequence'>, (<class 'float'>,), <class 'int'>, <class 'NoneType'>), 'dropna': (<class 'bool'>, <class 'collections.abc.Sequence'>, (<class 'bool'>,)), 'annotate': (<class 'bool'>, <class 'collections.abc.Sequence'>, (<class 'bool'>,)), 'rounding': (<class 'collections.abc.Sequence'>, (<class 'bool'>, <class 'int'>), <class 'int'>, <class 'bool'>, <class 'NoneType'>), 'fontsize': (<class 'collections.abc.Sequence'>, (<class 'str'>, <class 'int'>), <class 'str'>, <class 'int'>, <class 'NoneType'>), 'fontname': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,)), 'rotation': (<class 'int'>, <class 'float'>, <class 'collections.abc.Sequence'>, (<class 'int'>, <class 'float'>)), 'annotate_color': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,), <class 'bool'>, <class 'collections.abc.Sequence'>, (<class 'bool'>,), <class 'NoneType'>), 'plot_from': (<class 'int'>, <class 'pandas._libs.tslibs.period.Period'>, <class 'NoneType'>), 'label_series': (<class 'bool'>, <class 'collections.abc.Sequence'>, (<class 'bool'>,), <class 'NoneType'>), 'threshold': <class 'float'>, 'highlight': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,)), 'direction': <class 'str'>}
def
run_plot(data: ~DataT, **kwargs) -> matplotlib.axes._axes.Axes:
117def run_plot(data: DataT, **kwargs) -> Axes: 118 """Plot a series of percentage rates, highlighting the increasing runs. 119 120 Arguments 121 - data - ordered pandas Series of percentages, with PeriodIndex 122 - **kwargs 123 - threshold - float - used to ignore micro noise near zero 124 (for example, threshhold=0.01) 125 - round - int - rounding for highlight text 126 - highlight - str or Sequence[str] - color(s) for highlighting the 127 runs, two colors can be specified in a list if direction is "both" 128 - direction - str - whether the highlight is for an upward 129 or downward or both runs. Options are "up", "down" or "both". 130 - in addition the **kwargs for line_plot are accepted. 131 132 Return 133 - matplotlib Axes object""" 134 135 # --- check the kwargs 136 me = "run_plot" 137 report_kwargs(called_from=me, **kwargs) 138 kwargs = validate_kwargs(RUN_KW_TYPES, me, **kwargs) 139 140 # --- check the data 141 series = check_clean_timeseries(data, me) 142 if not isinstance(series, Series): 143 raise TypeError("series must be a pandas Series for run_plot()") 144 series, kwargs = constrain_data(series, **kwargs) 145 146 # --- default arguments - in **kwargs 147 kwargs[THRESHOLD] = kwargs.get(THRESHOLD, 0.1) 148 kwargs[DIRECTION] = kwargs.get(DIRECTION, "both") 149 kwargs[ROUNDING] = kwargs.get(ROUNDING, 2) 150 kwargs[HIGHLIGHT] = ( 151 kwargs.get(HIGHLIGHT, ("gold", "skyblue") if kwargs[DIRECTION] == "both" else "gold") 152 ) 153 kwargs[COLOR] = kwargs.get(COLOR, "darkblue") 154 155 # --- plot the line 156 kwargs[DRAWSTYLE] = kwargs.get(DRAWSTYLE, "steps-post") 157 lp_kwargs = limit_kwargs(LINE_KW_TYPES, **kwargs) 158 axes = line_plot(series, **lp_kwargs) 159 160 # plot the runs 161 match kwargs[DIRECTION]: 162 case "up": 163 _plot_runs(axes, series, up=True, **kwargs) 164 case "down": 165 _plot_runs(axes, series, up=False, **kwargs) 166 case "both": 167 _plot_runs(axes, series, up=True, **kwargs) 168 _plot_runs(axes, series, up=False, **kwargs) 169 case _: 170 raise ValueError( 171 f"Invalid value for direction: {kwargs[DIRECTION]}. " 172 "Expected 'up', 'down', or 'both'." 173 ) 174 return axes
Plot a series of percentage rates, highlighting the increasing runs.
Arguments
- data - ordered pandas Series of percentages, with PeriodIndex
- *kwargs
- threshold - float - used to ignore micro noise near zero (for example, threshhold=0.01)
- round - int - rounding for highlight text
- highlight - str or Sequence[str] - color(s) for highlighting the runs, two colors can be specified in a list if direction is "both"
- direction - str - whether the highlight is for an upward or downward or both runs. Options are "up", "down" or "both".
- in addition the *
Return
- matplotlib Axes object