mgplot.summary_plot

summary_plot.py:

Produce a summary plot for the data in a given DataFrame. The data is normalised to z-scores and scaled.

  1"""
  2summary_plot.py:
  3
  4Produce a summary plot for the data in a given DataFrame.
  5The data is normalised to z-scores and scaled.
  6"""
  7
  8# --- imports
  9# system imports
 10from typing import Any, NotRequired, Unpack
 11
 12# analytic third-party imports
 13from numpy import ndarray, array
 14from matplotlib.pyplot import Axes
 15from pandas import DataFrame, Period
 16
 17# local imports
 18from mgplot.settings import DataT
 19from mgplot.utilities import get_axes
 20from mgplot.finalise_plot import make_legend
 21from mgplot.utilities import constrain_data, check_clean_timeseries
 22from mgplot.keyword_checking import (
 23    report_kwargs,
 24    validate_kwargs,
 25    BaseKwargs,
 26)
 27
 28
 29# --- constants
 30ME = "summary_plot"
 31ZSCORES = "zscores"
 32ZSCALED = "zscaled"
 33
 34
 35class SummaryKwargs(BaseKwargs):
 36    """Keyword arguments for the summary_plot function."""
 37
 38    ax: NotRequired[Axes | None]
 39    verbose: NotRequired[bool]
 40    middle: NotRequired[float]
 41    plot_type: NotRequired[str]
 42    plot_from: NotRequired[int | Period | None]
 43    legend: NotRequired[dict[str, Any]]
 44
 45
 46# --- functions
 47def _calc_quantiles(middle: float) -> ndarray:
 48    """Calculate the quantiles for the middle of the data."""
 49    return array([(1 - middle) / 2.0, 1 - (1 - middle) / 2.0])
 50
 51
 52def _calculate_z(
 53    original: DataFrame,  # only contains the data points of interest
 54    middle: float,  # middle proportion of data to highlight (eg. 0.8)
 55    verbose: bool = False,  # print the summary data
 56) -> tuple[DataFrame, DataFrame]:
 57    """Calculate z-scores, scaled z-scores and middle quantiles.
 58    Return z_scores, z_scaled, q (which are the quantiles for the
 59    start/end of the middle proportion of data to highlight)."""
 60
 61    # calculate z-scores, scaled scores and middle quantiles
 62    z_scores: DataFrame = (original - original.mean()) / original.std()
 63    z_scaled: DataFrame = (
 64        # scale z-scores between -1 and +1
 65        (((z_scores - z_scores.min()) / (z_scores.max() - z_scores.min())) - 0.5) * 2
 66    )
 67    q_middle = _calc_quantiles(middle)
 68
 69    if verbose:
 70        frame = DataFrame(
 71            {
 72                "count": original.count(),
 73                "mean": original.mean(),
 74                "median": original.median(),
 75                "min shaded": original.quantile(q=q_middle[0]),
 76                "max shaded": original.quantile(q=q_middle[1]),
 77                "z-scores": z_scores.iloc[-1],
 78                "scaled": z_scaled.iloc[-1],
 79            }
 80        )
 81        print(frame)
 82
 83    return DataFrame(z_scores), DataFrame(z_scaled)  # syntactic sugar for type hinting
 84
 85
 86def _plot_middle_bars(
 87    adjusted: DataFrame,
 88    middle: float,
 89    kwargs: dict[str, Any],  # must be a dictionary, not a splat
 90) -> Axes:
 91    """Plot the middle (typically 80%) of the data as a bar.
 92    Note: also sets the x-axis limits in kwargs.
 93    Return the matplotlib Axes object."""
 94
 95    q = _calc_quantiles(middle)
 96    lo_hi: DataFrame = adjusted.quantile(q=q).T  # get the middle section of data
 97    span = 1.15
 98    space = 0.2
 99    low = min(adjusted.iloc[-1].min(), lo_hi.min().min(), -span) - space
100    high = max(adjusted.iloc[-1].max(), lo_hi.max().max(), span) + space
101    kwargs["xlim"] = (low, high)  # update the kwargs with the xlim
102    ax, _ = get_axes(**kwargs)
103    ax.barh(
104        y=lo_hi.index,
105        width=lo_hi[q[1]] - lo_hi[q[0]],
106        left=lo_hi[q[0]],
107        color="#bbbbbb",
108        label=f"Middle {middle * 100:0.0f}% of prints",
109    )
110    return ax
111
112
113def _plot_latest_datapoint(
114    ax: Axes,
115    original: DataFrame,
116    adjusted: DataFrame,
117    f_size: int,
118) -> None:
119    """Add the latest datapoints to the summary plot"""
120
121    ax.scatter(adjusted.iloc[-1], adjusted.columns, color="darkorange", label="Latest")
122    f_size = 10
123    row = adjusted.index[-1]
124    for col_num, col_name in enumerate(original.columns):
125        ax.text(
126            x=adjusted.at[row, col_name],
127            y=col_num,
128            s=f"{original.at[row, col_name]:.1f}",
129            ha="center",
130            va="center",
131            size=f_size,
132        )
133
134
135def _label_extremes(
136    ax: Axes,
137    data: tuple[DataFrame, DataFrame],
138    plot_type: str,
139    f_size: int,
140    kwargs: dict[str, Any],  # must be a dictionary, not a splat
141) -> None:
142    """Label the extremes in the scaled plots."""
143
144    original, adjusted = data
145    low, high = kwargs["xlim"]
146    ax.set_xlim(low, high)  # set the x-axis limits
147    if plot_type == ZSCALED:
148        ax.axvline(-1, color="#555555", linewidth=0.5, linestyle="--")
149        ax.axvline(1, color="#555555", linewidth=0.5, linestyle="--")
150        ax.scatter(
151            adjusted.median(),
152            adjusted.columns,
153            color="darkorchid",
154            marker="x",
155            s=5,
156            label="Median",
157        )
158        for col_num, col_name in enumerate(original.columns):
159            ax.text(
160                low,
161                col_num,
162                f" {original[col_name].min():.2f}",
163                ha="left",
164                va="center",
165                size=f_size,
166            )
167            ax.text(
168                high,
169                col_num,
170                f"{original[col_name].max():.2f} ",
171                ha="right",
172                va="center",
173                size=f_size,
174            )
175
176
177def _horizontal_bar_plot(
178    original: DataFrame,
179    adjusted: DataFrame,
180    middle: float,
181    plot_type: str,
182    kwargs: dict[str, Any],  # must be a dictionary, not a splat
183) -> Axes:
184    """Plot horizontal bars for the middle of the data."""
185
186    # kwargs is a dictionary, not a splat
187    # so that we can pass it to the Axes object and
188    # set the x-axis limits.
189
190    ax = _plot_middle_bars(adjusted, middle, kwargs)
191    f_size = "x-small"
192    _plot_latest_datapoint(ax, original, adjusted, f_size)
193    _label_extremes(ax, data=(original, adjusted), plot_type=plot_type, f_size=f_size, kwargs=kwargs)
194
195    return ax
196
197
198# public
199def summary_plot(data: DataT, **kwargs: Unpack[SummaryKwargs]) -> Axes:
200    """Plot a summary of historical data for a given DataFrame.
201
202    Args:x
203    - summary: DataFrame containing the summary data. The column names are
204      used as labels for the plot.
205    - kwargs: additional arguments for the plot, including:
206
207    Returns Axes.
208    """
209
210    # --- check the kwargs
211    me = "summary_plot"
212    report_kwargs(caller=me, **kwargs)
213    validate_kwargs(schema=SummaryKwargs, caller=me, **kwargs)
214
215    # --- check the data
216    data = check_clean_timeseries(data, me)
217    if not isinstance(data, DataFrame):
218        raise TypeError("data must be a pandas DataFrame for summary_plot()")
219    df = DataFrame(data)  # syntactic sugar for type hinting
220
221    # --- optional arguments
222    verbose = kwargs.pop("verbose", False)
223    middle = float(kwargs.pop("middle", 0.8))
224    plot_type = kwargs.pop("plot_type", ZSCORES)
225    kwargs["legend"] = kwargs.get(
226        "legend",
227        {
228            # put the legend below the x-axis label
229            "loc": "upper center",
230            "fontsize": "xx-small",
231            "bbox_to_anchor": (0.5, -0.125),
232            "ncol": 4,
233        },
234    )
235
236    # get the data, calculate z-scores and scaled scores based on the start period
237    subset, kwargsd = constrain_data(df, **kwargs)
238    z_scores, z_scaled = _calculate_z(subset, middle, verbose=verbose)
239
240    # plot as required by the plot_types argument
241    adjusted = z_scores if plot_type == ZSCORES else z_scaled
242    ax = _horizontal_bar_plot(subset, adjusted, middle, plot_type, kwargsd)
243    ax.tick_params(axis="y", labelsize="small")
244    make_legend(ax, kwargsd["legend"])
245    ax.set_xlim(kwargsd.get("xlim"))  # provide space for the labels
246
247    return ax
ME = 'summary_plot'
ZSCORES = 'zscores'
ZSCALED = 'zscaled'
class SummaryKwargs(mgplot.keyword_checking.BaseKwargs):
36class SummaryKwargs(BaseKwargs):
37    """Keyword arguments for the summary_plot function."""
38
39    ax: NotRequired[Axes | None]
40    verbose: NotRequired[bool]
41    middle: NotRequired[float]
42    plot_type: NotRequired[str]
43    plot_from: NotRequired[int | Period | None]
44    legend: NotRequired[dict[str, Any]]

Keyword arguments for the summary_plot function.

ax: NotRequired[matplotlib.axes._axes.Axes | None]
verbose: NotRequired[bool]
middle: NotRequired[float]
plot_type: NotRequired[str]
plot_from: NotRequired[int | pandas._libs.tslibs.period.Period | None]
legend: NotRequired[dict[str, Any]]
def summary_plot( data: ~DataT, **kwargs: Unpack[SummaryKwargs]) -> matplotlib.axes._axes.Axes:
200def summary_plot(data: DataT, **kwargs: Unpack[SummaryKwargs]) -> Axes:
201    """Plot a summary of historical data for a given DataFrame.
202
203    Args:x
204    - summary: DataFrame containing the summary data. The column names are
205      used as labels for the plot.
206    - kwargs: additional arguments for the plot, including:
207
208    Returns Axes.
209    """
210
211    # --- check the kwargs
212    me = "summary_plot"
213    report_kwargs(caller=me, **kwargs)
214    validate_kwargs(schema=SummaryKwargs, caller=me, **kwargs)
215
216    # --- check the data
217    data = check_clean_timeseries(data, me)
218    if not isinstance(data, DataFrame):
219        raise TypeError("data must be a pandas DataFrame for summary_plot()")
220    df = DataFrame(data)  # syntactic sugar for type hinting
221
222    # --- optional arguments
223    verbose = kwargs.pop("verbose", False)
224    middle = float(kwargs.pop("middle", 0.8))
225    plot_type = kwargs.pop("plot_type", ZSCORES)
226    kwargs["legend"] = kwargs.get(
227        "legend",
228        {
229            # put the legend below the x-axis label
230            "loc": "upper center",
231            "fontsize": "xx-small",
232            "bbox_to_anchor": (0.5, -0.125),
233            "ncol": 4,
234        },
235    )
236
237    # get the data, calculate z-scores and scaled scores based on the start period
238    subset, kwargsd = constrain_data(df, **kwargs)
239    z_scores, z_scaled = _calculate_z(subset, middle, verbose=verbose)
240
241    # plot as required by the plot_types argument
242    adjusted = z_scores if plot_type == ZSCORES else z_scaled
243    ax = _horizontal_bar_plot(subset, adjusted, middle, plot_type, kwargsd)
244    ax.tick_params(axis="y", labelsize="small")
245    make_legend(ax, kwargsd["legend"])
246    ax.set_xlim(kwargsd.get("xlim"))  # provide space for the labels
247
248    return ax

Plot a summary of historical data for a given DataFrame.

Args:x

  • summary: DataFrame containing the summary data. The column names are used as labels for the plot.
  • kwargs: additional arguments for the plot, including:

Returns Axes.