mgplot.run_plot
Plot and highlight the 'runs' in a series.
1"""Plot and highlight the 'runs' in a series.""" 2 3from collections.abc import Sequence 4from typing import NotRequired, Unpack 5 6from matplotlib import patheffects as pe 7from matplotlib.axes import Axes 8from pandas import Series, concat 9 10from mgplot.axis_utils import map_periodindex, set_labels 11from mgplot.keyword_checking import ( 12 limit_kwargs, 13 report_kwargs, 14 validate_kwargs, 15) 16from mgplot.line_plot import LineKwargs, line_plot 17from mgplot.settings import DataT, get_setting 18from mgplot.utilities import check_clean_timeseries, constrain_data 19 20# --- constants 21ME = "run_plot" 22STROKE_LINEWIDTH = 5 23DEFAULT_THRESHOLD = 0.1 24DEFAULT_ROUNDING = 2 25UP_COLOR = "gold" 26DOWN_COLOR = "skyblue" 27UNKNOWN_COLOR = "gray" # should never be needed 28LINE_COLOR = "darkblue" 29 30 31class RunKwargs(LineKwargs): 32 """Keyword arguments for the run_plot function.""" 33 34 threshold: NotRequired[float] 35 direction: NotRequired[str] 36 highlight_color: NotRequired[str | Sequence[str]] 37 highlight_label: NotRequired[str | Sequence[str]] 38 39 40# --- functions 41 42 43def _identify_runs( 44 series: Series, 45 threshold: float, 46 *, 47 up: bool, # False means down 48) -> tuple[Series, Series]: 49 """Identify monotonic increasing/decreasing runs.""" 50 if threshold <= 0: 51 raise ValueError("Threshold must be positive") 52 53 diffed = series.diff() 54 change_points = concat([diffed[diffed.gt(threshold)], diffed[diffed.lt(-threshold)]]).sort_index() 55 if series.index[0] not in change_points.index: 56 starting_point = Series([0], index=[series.index[0]]) 57 change_points = concat([change_points, starting_point]).sort_index() 58 facing = change_points > 0 if up else change_points < 0 59 cycles = (facing & ~facing.shift().astype(bool)).cumsum() 60 return cycles[facing], change_points 61 62 63def _get_highlight_color(highlight_config: str | Sequence[str], *, up: bool) -> str: 64 """Extract highlight color based on direction.""" 65 match highlight_config: 66 case str(): 67 return highlight_config 68 case Sequence(): 69 return highlight_config[0] if up else highlight_config[1] 70 case _: 71 raise ValueError( 72 f"Invalid type for highlight: {type(highlight_config)}. Expected str or Sequence.", 73 ) 74 75 76def _resolve_labels(label: str | Sequence[str] | None, direction: str) -> tuple[str | None, str | None]: 77 """Resolve labels for up and down directions.""" 78 if direction == "both": 79 if isinstance(label, Sequence) and not isinstance(label, str): 80 return label[0], label[1] 81 return label, label 82 if direction == "up": 83 single_label = label[0] if isinstance(label, Sequence) and not isinstance(label, str) else label 84 return single_label, None 85 if direction == "down": 86 single_label = label[1] if isinstance(label, Sequence) and not isinstance(label, str) else label 87 return None, single_label 88 return None, None 89 90 91def _configure_defaults(kwargs_d: dict, direction: str) -> None: 92 """Set default values for run plot configuration.""" 93 kwargs_d.setdefault("threshold", DEFAULT_THRESHOLD) 94 kwargs_d.setdefault("direction", "both") 95 kwargs_d.setdefault("rounding", DEFAULT_ROUNDING) 96 kwargs_d.setdefault("color", LINE_COLOR) 97 kwargs_d.setdefault("drawstyle", "steps-post") 98 kwargs_d.setdefault("label_series", True) 99 100 # Set default highlight colors based on direction 101 if "highlight_color" not in kwargs_d: 102 if direction == "both": 103 kwargs_d["highlight_color"] = (UP_COLOR, DOWN_COLOR) 104 elif direction == "up": 105 kwargs_d["highlight_color"] = UP_COLOR 106 else: # direction == "down" 107 kwargs_d["highlight_color"] = DOWN_COLOR 108 109 110def _plot_runs( 111 axes: Axes, 112 series: Series, 113 *, 114 run_label: str | None, 115 up: bool, 116 **kwargs: Unpack[RunKwargs], 117) -> None: 118 """Highlight the runs of a series.""" 119 threshold = kwargs.get("threshold", 0) 120 high_color = _get_highlight_color(kwargs.get("highlight_color", UNKNOWN_COLOR), up=up) 121 122 stretches, change_points = _identify_runs(series, threshold, up=up) 123 if stretches.empty: 124 return 125 126 max_stretch = int(stretches.max()) 127 for k in range(1, max_stretch + 1): 128 stretch = stretches[stretches == k] 129 axes.axvspan( 130 stretch.index.min(), 131 stretch.index.max(), 132 color=high_color, 133 zorder=-1, 134 label=run_label, 135 ) 136 run_label = "_" # only label the first run 137 138 # Calculate text position 139 space_above = series.max() - series[stretch.index].max() 140 space_below = series[stretch.index].min() - series.min() 141 y_pos, vert_align = (series.max(), "top") if space_above > space_below else (series.min(), "bottom") 142 143 # Create annotation text 144 rounding = kwargs.get("rounding", DEFAULT_ROUNDING) 145 total_change = change_points[stretch.index].sum() 146 annotation_text = f"{total_change.round(rounding)} pp" 147 148 text = axes.text( 149 x=stretch.index.min(), 150 y=y_pos, 151 s=annotation_text, 152 va=vert_align, 153 ha="left", 154 fontsize="x-small", 155 rotation=90, 156 ) 157 text.set_path_effects([pe.withStroke(linewidth=STROKE_LINEWIDTH, foreground="w")]) 158 159 160def run_plot(data: DataT, **kwargs: Unpack[RunKwargs]) -> Axes: 161 """Plot a series of percentage rates, highlighting the increasing runs. 162 163 Arguments: 164 data: Series - ordered pandas Series of percentages, with PeriodIndex. 165 kwargs: RunKwargs - keyword arguments for the run_plot function. 166 167 Return: 168 - matplotlib Axes object 169 170 """ 171 # --- validate inputs 172 report_kwargs(caller=ME, **kwargs) 173 validate_kwargs(schema=RunKwargs, caller=ME, **kwargs) 174 175 series = check_clean_timeseries(data, ME) 176 if not isinstance(series, Series): 177 raise TypeError("series must be a pandas Series for run_plot()") 178 series, kwargs_d = constrain_data(series, **kwargs) 179 180 # --- configure defaults and validate 181 direction = kwargs_d.get("direction", "both") 182 _configure_defaults(kwargs_d, direction) 183 184 threshold = kwargs_d["threshold"] 185 if threshold <= 0: 186 raise ValueError("Threshold must be positive") 187 188 # --- handle PeriodIndex conversion 189 saved_pi = map_periodindex(series) 190 if saved_pi is not None: 191 series = saved_pi[0] 192 193 # --- plot the line 194 lp_kwargs = limit_kwargs(LineKwargs, **kwargs_d) 195 axes = line_plot(series, **lp_kwargs) 196 197 # --- plot runs based on direction 198 run_label = kwargs_d.pop("highlight_label", None) 199 up_label, down_label = _resolve_labels(run_label, direction) 200 201 if direction in ("up", "both"): 202 _plot_runs(axes, series, run_label=up_label, up=True, **kwargs_d) 203 if direction in ("down", "both"): 204 _plot_runs(axes, series, run_label=down_label, up=False, **kwargs_d) 205 206 if direction not in ("up", "down", "both"): 207 raise ValueError(f"Invalid direction: {direction}. Expected 'up', 'down', or 'both'.") 208 209 # --- set axis labels 210 if saved_pi is not None: 211 set_labels(axes, saved_pi[1], kwargs.get("max_ticks", get_setting("max_ticks"))) 212 213 return axes
ME =
'run_plot'
STROKE_LINEWIDTH =
5
DEFAULT_THRESHOLD =
0.1
DEFAULT_ROUNDING =
2
UP_COLOR =
'gold'
DOWN_COLOR =
'skyblue'
UNKNOWN_COLOR =
'gray'
LINE_COLOR =
'darkblue'
32class RunKwargs(LineKwargs): 33 """Keyword arguments for the run_plot function.""" 34 35 threshold: NotRequired[float] 36 direction: NotRequired[str] 37 highlight_color: NotRequired[str | Sequence[str]] 38 highlight_label: NotRequired[str | Sequence[str]]
Keyword arguments for the run_plot function.
161def run_plot(data: DataT, **kwargs: Unpack[RunKwargs]) -> Axes: 162 """Plot a series of percentage rates, highlighting the increasing runs. 163 164 Arguments: 165 data: Series - ordered pandas Series of percentages, with PeriodIndex. 166 kwargs: RunKwargs - keyword arguments for the run_plot function. 167 168 Return: 169 - matplotlib Axes object 170 171 """ 172 # --- validate inputs 173 report_kwargs(caller=ME, **kwargs) 174 validate_kwargs(schema=RunKwargs, caller=ME, **kwargs) 175 176 series = check_clean_timeseries(data, ME) 177 if not isinstance(series, Series): 178 raise TypeError("series must be a pandas Series for run_plot()") 179 series, kwargs_d = constrain_data(series, **kwargs) 180 181 # --- configure defaults and validate 182 direction = kwargs_d.get("direction", "both") 183 _configure_defaults(kwargs_d, direction) 184 185 threshold = kwargs_d["threshold"] 186 if threshold <= 0: 187 raise ValueError("Threshold must be positive") 188 189 # --- handle PeriodIndex conversion 190 saved_pi = map_periodindex(series) 191 if saved_pi is not None: 192 series = saved_pi[0] 193 194 # --- plot the line 195 lp_kwargs = limit_kwargs(LineKwargs, **kwargs_d) 196 axes = line_plot(series, **lp_kwargs) 197 198 # --- plot runs based on direction 199 run_label = kwargs_d.pop("highlight_label", None) 200 up_label, down_label = _resolve_labels(run_label, direction) 201 202 if direction in ("up", "both"): 203 _plot_runs(axes, series, run_label=up_label, up=True, **kwargs_d) 204 if direction in ("down", "both"): 205 _plot_runs(axes, series, run_label=down_label, up=False, **kwargs_d) 206 207 if direction not in ("up", "down", "both"): 208 raise ValueError(f"Invalid direction: {direction}. Expected 'up', 'down', or 'both'.") 209 210 # --- set axis labels 211 if saved_pi is not None: 212 set_labels(axes, saved_pi[1], kwargs.get("max_ticks", get_setting("max_ticks"))) 213 214 return axes
Plot a series of percentage rates, highlighting the increasing runs.
Arguments: data: Series - ordered pandas Series of percentages, with PeriodIndex. kwargs: RunKwargs - keyword arguments for the run_plot function.
Return:
- matplotlib Axes object