mgplot.summary_plot
summary_plot.py:
Produce a summary plot for the data in a given DataFrame. The data is normalised to z-scores and scaled.
1""" 2summary_plot.py: 3 4Produce a summary plot for the data in a given DataFrame. 5The data is normalised to z-scores and scaled. 6""" 7 8# --- imports 9# system imports 10from typing import Any, NotRequired, Unpack 11 12# analytic third-party imports 13from numpy import ndarray, array 14from matplotlib.pyplot import Axes 15from pandas import DataFrame, Period 16 17# local imports 18from mgplot.settings import DataT 19from mgplot.utilities import get_axes 20from mgplot.finalise_plot import make_legend 21from mgplot.utilities import constrain_data, check_clean_timeseries 22from mgplot.keyword_checking import ( 23 report_kwargs, 24 validate_kwargs, 25 BaseKwargs, 26) 27 28 29# --- constants 30ME = "summary_plot" 31ZSCORES = "zscores" 32ZSCALED = "zscaled" 33 34 35class SummaryKwargs(BaseKwargs): 36 """Keyword arguments for the summary_plot function.""" 37 38 ax: NotRequired[Axes | None] 39 verbose: NotRequired[bool] 40 middle: NotRequired[float] 41 plot_type: NotRequired[str] 42 plot_from: NotRequired[int | Period | None] 43 legend: NotRequired[dict[str, Any]] 44 45 46# --- functions 47def _calc_quantiles(middle: float) -> ndarray: 48 """Calculate the quantiles for the middle of the data.""" 49 return array([(1 - middle) / 2.0, 1 - (1 - middle) / 2.0]) 50 51 52def _calculate_z( 53 original: DataFrame, # only contains the data points of interest 54 middle: float, # middle proportion of data to highlight (eg. 0.8) 55 verbose: bool = False, # print the summary data 56) -> tuple[DataFrame, DataFrame]: 57 """Calculate z-scores, scaled z-scores and middle quantiles. 58 Return z_scores, z_scaled, q (which are the quantiles for the 59 start/end of the middle proportion of data to highlight).""" 60 61 # calculate z-scores, scaled scores and middle quantiles 62 z_scores: DataFrame = (original - original.mean()) / original.std() 63 z_scaled: DataFrame = ( 64 # scale z-scores between -1 and +1 65 (((z_scores - z_scores.min()) / (z_scores.max() - z_scores.min())) - 0.5) * 2 66 ) 67 q_middle = _calc_quantiles(middle) 68 69 if verbose: 70 frame = DataFrame( 71 { 72 "count": original.count(), 73 "mean": original.mean(), 74 "median": original.median(), 75 "min shaded": original.quantile(q=q_middle[0]), 76 "max shaded": original.quantile(q=q_middle[1]), 77 "z-scores": z_scores.iloc[-1], 78 "scaled": z_scaled.iloc[-1], 79 } 80 ) 81 print(frame) 82 83 return DataFrame(z_scores), DataFrame(z_scaled) # syntactic sugar for type hinting 84 85 86def _plot_middle_bars( 87 adjusted: DataFrame, 88 middle: float, 89 kwargs: dict[str, Any], # must be a dictionary, not a splat 90) -> Axes: 91 """Plot the middle (typically 80%) of the data as a bar. 92 Note: also sets the x-axis limits in kwargs. 93 Return the matplotlib Axes object.""" 94 95 q = _calc_quantiles(middle) 96 lo_hi: DataFrame = adjusted.quantile(q=q).T # get the middle section of data 97 span = 1.15 98 space = 0.2 99 low = min(adjusted.iloc[-1].min(), lo_hi.min().min(), -span) - space 100 high = max(adjusted.iloc[-1].max(), lo_hi.max().max(), span) + space 101 kwargs["xlim"] = (low, high) # update the kwargs with the xlim 102 ax, _ = get_axes(**kwargs) 103 ax.barh( 104 y=lo_hi.index, 105 width=lo_hi[q[1]] - lo_hi[q[0]], 106 left=lo_hi[q[0]], 107 color="#bbbbbb", 108 label=f"Middle {middle * 100:0.0f}% of prints", 109 ) 110 return ax 111 112 113def _plot_latest_datapoint( 114 ax: Axes, 115 original: DataFrame, 116 adjusted: DataFrame, 117 f_size: int, 118) -> None: 119 """Add the latest datapoints to the summary plot""" 120 121 ax.scatter(adjusted.iloc[-1], adjusted.columns, color="darkorange", label="Latest") 122 f_size = 10 123 row = adjusted.index[-1] 124 for col_num, col_name in enumerate(original.columns): 125 ax.text( 126 x=adjusted.at[row, col_name], 127 y=col_num, 128 s=f"{original.at[row, col_name]:.1f}", 129 ha="center", 130 va="center", 131 size=f_size, 132 ) 133 134 135def _label_extremes( 136 ax: Axes, 137 data: tuple[DataFrame, DataFrame], 138 plot_type: str, 139 f_size: int, 140 kwargs: dict[str, Any], # must be a dictionary, not a splat 141) -> None: 142 """Label the extremes in the scaled plots.""" 143 144 original, adjusted = data 145 low, high = kwargs["xlim"] 146 ax.set_xlim(low, high) # set the x-axis limits 147 if plot_type == ZSCALED: 148 ax.axvline(-1, color="#555555", linewidth=0.5, linestyle="--") 149 ax.axvline(1, color="#555555", linewidth=0.5, linestyle="--") 150 ax.scatter( 151 adjusted.median(), 152 adjusted.columns, 153 color="darkorchid", 154 marker="x", 155 s=5, 156 label="Median", 157 ) 158 for col_num, col_name in enumerate(original.columns): 159 ax.text( 160 low, 161 col_num, 162 f" {original[col_name].min():.2f}", 163 ha="left", 164 va="center", 165 size=f_size, 166 ) 167 ax.text( 168 high, 169 col_num, 170 f"{original[col_name].max():.2f} ", 171 ha="right", 172 va="center", 173 size=f_size, 174 ) 175 176 177def _horizontal_bar_plot( 178 original: DataFrame, 179 adjusted: DataFrame, 180 middle: float, 181 plot_type: str, 182 kwargs: dict[str, Any], # must be a dictionary, not a splat 183) -> Axes: 184 """Plot horizontal bars for the middle of the data.""" 185 186 # kwargs is a dictionary, not a splat 187 # so that we can pass it to the Axes object and 188 # set the x-axis limits. 189 190 ax = _plot_middle_bars(adjusted, middle, kwargs) 191 f_size = "x-small" 192 _plot_latest_datapoint(ax, original, adjusted, f_size) 193 _label_extremes(ax, data=(original, adjusted), plot_type=plot_type, f_size=f_size, kwargs=kwargs) 194 195 return ax 196 197 198# public 199def summary_plot(data: DataT, **kwargs: Unpack[SummaryKwargs]) -> Axes: 200 """Plot a summary of historical data for a given DataFrame. 201 202 Args:x 203 - summary: DataFrame containing the summary data. The column names are 204 used as labels for the plot. 205 - kwargs: additional arguments for the plot, including: 206 207 Returns Axes. 208 """ 209 210 # --- check the kwargs 211 me = "summary_plot" 212 report_kwargs(caller=me, **kwargs) 213 validate_kwargs(schema=SummaryKwargs, caller=me, **kwargs) 214 215 # --- check the data 216 data = check_clean_timeseries(data, me) 217 if not isinstance(data, DataFrame): 218 raise TypeError("data must be a pandas DataFrame for summary_plot()") 219 df = DataFrame(data) # syntactic sugar for type hinting 220 221 # --- optional arguments 222 verbose = kwargs.pop("verbose", False) 223 middle = float(kwargs.pop("middle", 0.8)) 224 plot_type = kwargs.pop("plot_type", ZSCORES) 225 kwargs["legend"] = kwargs.get( 226 "legend", 227 { 228 # put the legend below the x-axis label 229 "loc": "upper center", 230 "fontsize": "xx-small", 231 "bbox_to_anchor": (0.5, -0.125), 232 "ncol": 4, 233 }, 234 ) 235 236 # get the data, calculate z-scores and scaled scores based on the start period 237 subset, kwargsd = constrain_data(df, **kwargs) 238 z_scores, z_scaled = _calculate_z(subset, middle, verbose=verbose) 239 240 # plot as required by the plot_types argument 241 adjusted = z_scores if plot_type == ZSCORES else z_scaled 242 ax = _horizontal_bar_plot(subset, adjusted, middle, plot_type, kwargsd) 243 ax.tick_params(axis="y", labelsize="small") 244 make_legend(ax, kwargsd["legend"]) 245 ax.set_xlim(kwargsd.get("xlim")) # provide space for the labels 246 247 return ax
ME =
'summary_plot'
ZSCORES =
'zscores'
ZSCALED =
'zscaled'
class
SummaryKwargs(mgplot.keyword_checking.BaseKwargs):
36class SummaryKwargs(BaseKwargs): 37 """Keyword arguments for the summary_plot function.""" 38 39 ax: NotRequired[Axes | None] 40 verbose: NotRequired[bool] 41 middle: NotRequired[float] 42 plot_type: NotRequired[str] 43 plot_from: NotRequired[int | Period | None] 44 legend: NotRequired[dict[str, Any]]
Keyword arguments for the summary_plot function.
200def summary_plot(data: DataT, **kwargs: Unpack[SummaryKwargs]) -> Axes: 201 """Plot a summary of historical data for a given DataFrame. 202 203 Args:x 204 - summary: DataFrame containing the summary data. The column names are 205 used as labels for the plot. 206 - kwargs: additional arguments for the plot, including: 207 208 Returns Axes. 209 """ 210 211 # --- check the kwargs 212 me = "summary_plot" 213 report_kwargs(caller=me, **kwargs) 214 validate_kwargs(schema=SummaryKwargs, caller=me, **kwargs) 215 216 # --- check the data 217 data = check_clean_timeseries(data, me) 218 if not isinstance(data, DataFrame): 219 raise TypeError("data must be a pandas DataFrame for summary_plot()") 220 df = DataFrame(data) # syntactic sugar for type hinting 221 222 # --- optional arguments 223 verbose = kwargs.pop("verbose", False) 224 middle = float(kwargs.pop("middle", 0.8)) 225 plot_type = kwargs.pop("plot_type", ZSCORES) 226 kwargs["legend"] = kwargs.get( 227 "legend", 228 { 229 # put the legend below the x-axis label 230 "loc": "upper center", 231 "fontsize": "xx-small", 232 "bbox_to_anchor": (0.5, -0.125), 233 "ncol": 4, 234 }, 235 ) 236 237 # get the data, calculate z-scores and scaled scores based on the start period 238 subset, kwargsd = constrain_data(df, **kwargs) 239 z_scores, z_scaled = _calculate_z(subset, middle, verbose=verbose) 240 241 # plot as required by the plot_types argument 242 adjusted = z_scores if plot_type == ZSCORES else z_scaled 243 ax = _horizontal_bar_plot(subset, adjusted, middle, plot_type, kwargsd) 244 ax.tick_params(axis="y", labelsize="small") 245 make_legend(ax, kwargsd["legend"]) 246 ax.set_xlim(kwargsd.get("xlim")) # provide space for the labels 247 248 return ax
Plot a summary of historical data for a given DataFrame.
Args:x
- summary: DataFrame containing the summary data. The column names are used as labels for the plot.
- kwargs: additional arguments for the plot, including:
Returns Axes.