mgplot.summary_plot
summary_plot.py: Produce a summary plot for the data in a given DataFrame. The data is normalised to z-scores and scaled.
1""" 2summary_plot.py: 3Produce a summary plot for the data in a given DataFrame. 4The data is normalised to z-scores and scaled. 5""" 6 7# --- imports 8# system imports 9from typing import Any 10 11# from collections.abc import Sequence 12 13# analytic third-party imports 14from numpy import ndarray, array 15from matplotlib.pyplot import Axes, subplots 16from pandas import DataFrame, Period 17 18# local imports 19from mgplot.settings import DataT 20from mgplot.finalise_plot import make_legend 21from mgplot.utilities import constrain_data, check_clean_timeseries 22from mgplot.kw_type_checking import ( 23 report_kwargs, 24 ExpectedTypeDict, 25 validate_expected, 26 validate_kwargs, 27) 28from mgplot.keyword_names import ( 29 AX, 30 VERBOSE, 31 MIDDLE, 32 PLOT_TYPE, 33 PLOT_FROM 34) 35 36 37# --- constants 38ZSCORES = "zscores" 39ZSCALED = "zscaled" 40 41SUMMARY_KW_TYPES: ExpectedTypeDict = { 42 AX: (Axes, type(None)), 43 VERBOSE: bool, 44 MIDDLE: float, 45 PLOT_TYPE: str, 46 PLOT_FROM: (int, Period, type(None)), 47} 48validate_expected(SUMMARY_KW_TYPES, "summary_plot") 49 50 51# --- functions 52def _calc_quantiles(middle: float) -> ndarray: 53 """Calculate the quantiles for the middle of the data.""" 54 return array([(1 - middle) / 2.0, 1 - (1 - middle) / 2.0]) 55 56 57def _calculate_z( 58 original: DataFrame, # only contains the data points of interest 59 middle: float, # middle proportion of data to highlight (eg. 0.8) 60 verbose: bool = False, # print the summary data 61) -> tuple[DataFrame, DataFrame]: 62 """Calculate z-scores, scaled z-scores and middle quantiles. 63 Return z_scores, z_scaled, q (which are the quantiles for the 64 start/end of the middle proportion of data to highlight).""" 65 66 # calculate z-scores, scaled scores and middle quantiles 67 z_scores: DataFrame = (original - original.mean()) / original.std() 68 z_scaled: DataFrame = ( 69 # scale z-scores between -1 and +1 70 (((z_scores - z_scores.min()) / (z_scores.max() - z_scores.min())) - 0.5) 71 * 2 72 ) 73 q_middle = _calc_quantiles(middle) 74 75 if verbose: 76 frame = DataFrame( 77 { 78 "count": original.count(), 79 "mean": original.mean(), 80 "median": original.median(), 81 "min shaded": original.quantile(q=q_middle[0]), 82 "max shaded": original.quantile(q=q_middle[1]), 83 "z-scores": z_scores.iloc[-1], 84 "scaled": z_scaled.iloc[-1], 85 } 86 ) 87 print(frame) 88 89 return DataFrame(z_scores), DataFrame(z_scaled) # syntactic sugar for type hinting 90 91 92def _plot_middle_bars( 93 adjusted: DataFrame, 94 middle: float, 95 kwargs: dict[str, Any], # must be a dictionary, not a splat 96) -> Axes: 97 """Plot the middle (typically 80%) of the data as a bar. 98 Note: also sets the x-axis limits in kwargs. 99 Return the matplotlib Axes object.""" 100 101 q = _calc_quantiles(middle) 102 lo_hi: DataFrame = adjusted.quantile(q=q).T # get the middle section of data 103 span = 1.15 104 space = 0.2 105 low = min(adjusted.iloc[-1].min(), lo_hi.min().min(), -span) - space 106 high = max(adjusted.iloc[-1].max(), lo_hi.max().max(), span) + space 107 kwargs["xlim"] = (low, high) # remember the x-axis limits 108 _fig, ax = subplots() 109 ax.barh( 110 y=lo_hi.index, 111 width=lo_hi[q[1]] - lo_hi[q[0]], 112 left=lo_hi[q[0]], 113 color="#bbbbbb", 114 label=f"Middle {middle*100:0.0f}% of prints", 115 ) 116 return ax 117 118 119def _plot_latest_datapoint( 120 ax: Axes, 121 original: DataFrame, 122 adjusted: DataFrame, 123 f_size: int, 124) -> None: 125 """Add the latest datapoints to the summary plot""" 126 127 ax.scatter(adjusted.iloc[-1], adjusted.columns, color="darkorange", label="Latest") 128 f_size = 10 129 row = adjusted.index[-1] 130 for col_num, col_name in enumerate(original.columns): 131 ax.text( 132 x=adjusted.at[row, col_name], 133 y=col_num, 134 s=f"{original.at[row, col_name]:.1f}", 135 ha="center", 136 va="center", 137 size=f_size, 138 ) 139 140 141def _label_extremes( 142 ax: Axes, 143 data: tuple[DataFrame, DataFrame], 144 plot_type: str, 145 f_size: int, 146 kwargs: dict[str, Any], # must be a dictionary, not a splat 147) -> None: 148 """Label the extremes in the scaled plots.""" 149 150 original, adjusted = data 151 low, high = kwargs["xlim"] 152 if plot_type == ZSCALED: 153 ax.axvline(-1, color="#555555", linewidth=0.5, linestyle="--") 154 ax.axvline(1, color="#555555", linewidth=0.5, linestyle="--") 155 ax.scatter( 156 adjusted.median(), 157 adjusted.columns, 158 color="darkorchid", 159 marker="x", 160 s=5, 161 label="Median", 162 ) 163 for col_num, col_name in enumerate(original.columns): 164 ax.text( 165 low, 166 col_num, 167 f" {original[col_name].min():.1f}", 168 ha="left", 169 va="center", 170 size=f_size, 171 ) 172 ax.text( 173 high, 174 col_num, 175 f"{original[col_name].max():.1f} ", 176 ha="right", 177 va="center", 178 size=f_size, 179 ) 180 181 182def _horizontal_bar_plot( 183 original: DataFrame, 184 adjusted: DataFrame, 185 middle: float, 186 plot_type: str, 187 kwargs: dict[str, Any], # must be a dictionary, not a splat 188) -> Axes: 189 """Plot horizontal bars for the middle of the data.""" 190 191 # kwargs is a dictionary, not a splat 192 # so that we can pass it to the Axes object and 193 # set the x-axis limits. 194 195 ax = _plot_middle_bars(adjusted, middle, kwargs) 196 f_size = 10 197 _plot_latest_datapoint(ax, original, adjusted, f_size) 198 _label_extremes( 199 ax, data=(original, adjusted), plot_type=plot_type, f_size=f_size, kwargs=kwargs 200 ) 201 202 return ax 203 204 205# public 206def summary_plot( 207 data: DataT, # summary data 208 **kwargs, 209) -> Axes: 210 """Plot a summary of historical data for a given DataFrame. 211 212 Args: 213 - summary: DataFrame containing the summary data. The column names are 214 used as labels for the plot. 215 - kwargs: additional arguments for the plot, including: 216 - plot_from: int | Period | None 217 - verbose: if True, print the summary data. 218 - middle: proportion of data to highlight (default is 0.8). 219 - plot_types: list of plot types to generate. 220 221 222 Returns Axes. 223 """ 224 225 # --- check the kwargs 226 me = "summary_plot" 227 report_kwargs(called_from=me, **kwargs) 228 kwargs = validate_kwargs(SUMMARY_KW_TYPES, me, **kwargs) 229 230 # --- check the data 231 data = check_clean_timeseries(data, me) 232 if not isinstance(data, DataFrame): 233 raise TypeError("data must be a pandas DataFrame for summary_plot()") 234 df = DataFrame(data) # syntactic sugar for type hinting 235 236 # --- optional arguments 237 verbose = kwargs.pop("verbose", False) 238 middle = float(kwargs.pop("middle", 0.8)) 239 plot_type = kwargs.pop("plot_type", ZSCORES) 240 kwargs["legend"] = kwargs.get( 241 "legend", 242 { 243 # put the legend below the x-axis label 244 "loc": "upper center", 245 "fontsize": "xx-small", 246 "bbox_to_anchor": (0.5, -0.125), 247 "ncol": 4, 248 }, 249 ) 250 251 # get the data, calculate z-scores and scaled scores based on the start period 252 subset, kwargs = constrain_data(df, **kwargs) 253 z_scores, z_scaled = _calculate_z(subset, middle, verbose=verbose) 254 255 # plot as required by the plot_types argument 256 adjusted = z_scores if plot_type == ZSCORES else z_scaled 257 ax = _horizontal_bar_plot(subset, adjusted, middle, plot_type, kwargs) 258 ax.tick_params(axis="y", labelsize="small") 259 make_legend(ax, kwargs["legend"]) 260 ax.set_xlim(kwargs.get("xlim", None)) # provide space for the labels 261 262 return ax
ZSCORES =
'zscores'
ZSCALED =
'zscaled'
SUMMARY_KW_TYPES: mgplot.kw_type_checking.ExpectedTypeDict =
{'ax': (<class 'matplotlib.axes._axes.Axes'>, <class 'NoneType'>), 'verbose': <class 'bool'>, 'middle': <class 'float'>, 'plot_type': <class 'str'>, 'plot_from': (<class 'int'>, <class 'pandas._libs.tslibs.period.Period'>, <class 'NoneType'>)}
def
summary_plot(data: ~DataT, **kwargs) -> matplotlib.axes._axes.Axes:
207def summary_plot( 208 data: DataT, # summary data 209 **kwargs, 210) -> Axes: 211 """Plot a summary of historical data for a given DataFrame. 212 213 Args: 214 - summary: DataFrame containing the summary data. The column names are 215 used as labels for the plot. 216 - kwargs: additional arguments for the plot, including: 217 - plot_from: int | Period | None 218 - verbose: if True, print the summary data. 219 - middle: proportion of data to highlight (default is 0.8). 220 - plot_types: list of plot types to generate. 221 222 223 Returns Axes. 224 """ 225 226 # --- check the kwargs 227 me = "summary_plot" 228 report_kwargs(called_from=me, **kwargs) 229 kwargs = validate_kwargs(SUMMARY_KW_TYPES, me, **kwargs) 230 231 # --- check the data 232 data = check_clean_timeseries(data, me) 233 if not isinstance(data, DataFrame): 234 raise TypeError("data must be a pandas DataFrame for summary_plot()") 235 df = DataFrame(data) # syntactic sugar for type hinting 236 237 # --- optional arguments 238 verbose = kwargs.pop("verbose", False) 239 middle = float(kwargs.pop("middle", 0.8)) 240 plot_type = kwargs.pop("plot_type", ZSCORES) 241 kwargs["legend"] = kwargs.get( 242 "legend", 243 { 244 # put the legend below the x-axis label 245 "loc": "upper center", 246 "fontsize": "xx-small", 247 "bbox_to_anchor": (0.5, -0.125), 248 "ncol": 4, 249 }, 250 ) 251 252 # get the data, calculate z-scores and scaled scores based on the start period 253 subset, kwargs = constrain_data(df, **kwargs) 254 z_scores, z_scaled = _calculate_z(subset, middle, verbose=verbose) 255 256 # plot as required by the plot_types argument 257 adjusted = z_scores if plot_type == ZSCORES else z_scaled 258 ax = _horizontal_bar_plot(subset, adjusted, middle, plot_type, kwargs) 259 ax.tick_params(axis="y", labelsize="small") 260 make_legend(ax, kwargs["legend"]) 261 ax.set_xlim(kwargs.get("xlim", None)) # provide space for the labels 262 263 return ax
Plot a summary of historical data for a given DataFrame.
Args:
- summary: DataFrame containing the summary data. The column names are used as labels for the plot.
- kwargs: additional arguments for the plot, including:
- plot_from: int | Period | None
- verbose: if True, print the summary data.
- middle: proportion of data to highlight (default is 0.8).
- plot_types: list of plot types to generate.
Returns Axes.