mgplot.postcovid_plot

covid_recovery_plot.py Plot the pre-COVID trajectory against the current trend.

  1"""
  2covid_recovery_plot.py
  3Plot the pre-COVID trajectory against the current trend.
  4"""
  5
  6# --- imports
  7from collections.abc import Sequence
  8from pandas import DataFrame, Series, Period, PeriodIndex
  9from matplotlib.pyplot import Axes
 10from numpy import arange, polyfit
 11
 12from mgplot.settings import DataT, get_setting
 13from mgplot.line_plot import line_plot, LINE_KW_TYPES
 14from mgplot.utilities import check_clean_timeseries
 15from mgplot.kw_type_checking import (
 16    ExpectedTypeDict,
 17    validate_kwargs,
 18    validate_expected,
 19    report_kwargs,
 20)
 21from mgplot.keyword_names import (
 22    START_R,
 23    END_R,
 24    WIDTH,
 25    STYLE,
 26    PLOT_FROM,
 27    LABEL_SERIES,
 28    ANNOTATE,
 29    COLOR,
 30)
 31
 32
 33# --- constants
 34POSTCOVID_KW_TYPES: ExpectedTypeDict = {
 35    START_R: Period,  # type: ignore[assignment]
 36    END_R: Period,  # type: ignore[assignment]
 37    WIDTH: (int, float),
 38    STYLE: (str, Sequence, (str,)),
 39    LABEL_SERIES: (bool, dict, (str, object), type(None)),
 40    ANNOTATE: (bool, tuple),
 41    COLOR: (str, tuple),
 42} | LINE_KW_TYPES
 43validate_expected(POSTCOVID_KW_TYPES, "postcovid_plot")
 44
 45
 46# --- functions
 47def get_projection(original: Series, to_period: Period) -> Series:
 48    """
 49    Projection based on data from the start of a series
 50    to the to_period (inclusive). Returns projection over the whole
 51    period of the original series.
 52    """
 53
 54    y_regress = original[original.index <= to_period].copy()
 55    x_regress = arange(len(y_regress))
 56    m, b = polyfit(x_regress, y_regress, 1)
 57
 58    x_complete = arange(len(original))
 59    projection = Series((x_complete * m) + b, index=original.index)
 60
 61    return projection
 62
 63
 64def postcovid_plot(data: DataT, **kwargs) -> Axes:
 65    """
 66    Plots a series with a PeriodIndex.
 67
 68    Arguments
 69    - data - the series to be plotted (note that this function
 70      is designed to work with a single series, not a DataFrame).
 71    - **kwargs - same as for line_plot() and finalise_plot().
 72
 73    Raises:
 74    - TypeError if series is not a pandas Series
 75    - TypeError if series does not have a PeriodIndex
 76    - ValueError if series does not have a D, M or Q frequency
 77    - ValueError if regression start is after regression end
 78    """
 79
 80    # --- check the kwargs
 81    me = "postcovid_plot"
 82    report_kwargs(called_from=me, **kwargs)
 83    kwargs = validate_kwargs(POSTCOVID_KW_TYPES, me, **kwargs)
 84
 85    # --- check the data
 86    data = check_clean_timeseries(data, me)
 87    if not isinstance(data, Series):
 88        raise TypeError("The series argument must be a pandas Series")
 89    series: Series = data
 90    series_index = PeriodIndex(series.index)  # syntactic sugar for type hinting
 91    if series_index.freqstr[:1] not in ("Q", "M", "D"):
 92        raise ValueError("The series index must have a D, M or Q freq")
 93    # rely on line_plot() to validate kwargs
 94    if PLOT_FROM in kwargs:
 95        print("Warning: the 'plot_from' argument is ignored in postcovid_plot().")
 96        del kwargs[PLOT_FROM]
 97
 98    # --- plot COVID counterfactural
 99    freq = PeriodIndex(series.index).freqstr  # syntactic sugar for type hinting
100    match freq[0]:
101        case "Q":
102            start_regression = Period("2014Q4", freq=freq)
103            end_regression = Period("2019Q4", freq=freq)
104        case "M":
105            start_regression = Period("2015-01", freq=freq)
106            end_regression = Period("2020-01", freq=freq)
107        case "D":
108            start_regression = Period("2015-01-01", freq=freq)
109            end_regression = Period("2020-01-01", freq=freq)
110
111    start_regression = Period(kwargs.pop(START_R, start_regression), freq=freq)
112    end_regression = Period(kwargs.pop(END_R, end_regression), freq=freq)
113    if start_regression >= end_regression:
114        raise ValueError("Start period must be before end period")
115
116    # --- combine data and projection
117    recent = series[series.index >= start_regression].copy()
118    recent.name = "Series"
119    projection = get_projection(recent, end_regression)
120    projection.name = "Pre-COVID projection"
121    data_set = DataFrame([projection, recent]).T
122
123    # --- activate plot settings
124    kwargs[WIDTH] = kwargs.pop(
125        WIDTH, (get_setting("line_normal"), get_setting("line_wide"))
126    )  # series line is thicker than projection
127    kwargs[STYLE] = kwargs.pop(STYLE, ("--", "-"))  # dashed regression line
128    kwargs[LABEL_SERIES] = kwargs.pop(LABEL_SERIES, True)
129    kwargs[ANNOTATE] = kwargs.pop(ANNOTATE, (False, True))  # annotate series only
130    kwargs[COLOR] = kwargs.pop(COLOR, ("darkblue", "#dd0000"))
131
132    return line_plot(
133        data_set,
134        **kwargs,
135    )
POSTCOVID_KW_TYPES: mgplot.kw_type_checking.ExpectedTypeDict = {'start_r': <class 'pandas._libs.tslibs.period.Period'>, 'end_r': <class 'pandas._libs.tslibs.period.Period'>, 'width': (<class 'float'>, <class 'int'>, <class 'collections.abc.Sequence'>, (<class 'float'>, <class 'int'>)), 'style': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,)), 'label_series': (<class 'bool'>, <class 'collections.abc.Sequence'>, (<class 'bool'>,), <class 'NoneType'>), 'annotate': (<class 'bool'>, <class 'collections.abc.Sequence'>, (<class 'bool'>,)), 'color': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,)), 'ax': (<class 'matplotlib.axes._axes.Axes'>, <class 'NoneType'>), 'alpha': (<class 'float'>, <class 'collections.abc.Sequence'>, (<class 'float'>,)), 'drawstyle': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,), <class 'NoneType'>), 'marker': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,), <class 'NoneType'>), 'markersize': (<class 'float'>, <class 'collections.abc.Sequence'>, (<class 'float'>,), <class 'int'>, <class 'NoneType'>), 'dropna': (<class 'bool'>, <class 'collections.abc.Sequence'>, (<class 'bool'>,)), 'rounding': (<class 'collections.abc.Sequence'>, (<class 'bool'>, <class 'int'>), <class 'int'>, <class 'bool'>, <class 'NoneType'>), 'fontsize': (<class 'collections.abc.Sequence'>, (<class 'str'>, <class 'int'>), <class 'str'>, <class 'int'>, <class 'NoneType'>), 'fontname': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,)), 'rotation': (<class 'int'>, <class 'float'>, <class 'collections.abc.Sequence'>, (<class 'int'>, <class 'float'>)), 'annotate_color': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,), <class 'bool'>, <class 'collections.abc.Sequence'>, (<class 'bool'>,), <class 'NoneType'>), 'plot_from': (<class 'int'>, <class 'pandas._libs.tslibs.period.Period'>, <class 'NoneType'>)}
def get_projection( original: pandas.core.series.Series, to_period: pandas._libs.tslibs.period.Period) -> pandas.core.series.Series:
48def get_projection(original: Series, to_period: Period) -> Series:
49    """
50    Projection based on data from the start of a series
51    to the to_period (inclusive). Returns projection over the whole
52    period of the original series.
53    """
54
55    y_regress = original[original.index <= to_period].copy()
56    x_regress = arange(len(y_regress))
57    m, b = polyfit(x_regress, y_regress, 1)
58
59    x_complete = arange(len(original))
60    projection = Series((x_complete * m) + b, index=original.index)
61
62    return projection

Projection based on data from the start of a series to the to_period (inclusive). Returns projection over the whole period of the original series.

def postcovid_plot(data: ~DataT, **kwargs) -> matplotlib.axes._axes.Axes:
 65def postcovid_plot(data: DataT, **kwargs) -> Axes:
 66    """
 67    Plots a series with a PeriodIndex.
 68
 69    Arguments
 70    - data - the series to be plotted (note that this function
 71      is designed to work with a single series, not a DataFrame).
 72    - **kwargs - same as for line_plot() and finalise_plot().
 73
 74    Raises:
 75    - TypeError if series is not a pandas Series
 76    - TypeError if series does not have a PeriodIndex
 77    - ValueError if series does not have a D, M or Q frequency
 78    - ValueError if regression start is after regression end
 79    """
 80
 81    # --- check the kwargs
 82    me = "postcovid_plot"
 83    report_kwargs(called_from=me, **kwargs)
 84    kwargs = validate_kwargs(POSTCOVID_KW_TYPES, me, **kwargs)
 85
 86    # --- check the data
 87    data = check_clean_timeseries(data, me)
 88    if not isinstance(data, Series):
 89        raise TypeError("The series argument must be a pandas Series")
 90    series: Series = data
 91    series_index = PeriodIndex(series.index)  # syntactic sugar for type hinting
 92    if series_index.freqstr[:1] not in ("Q", "M", "D"):
 93        raise ValueError("The series index must have a D, M or Q freq")
 94    # rely on line_plot() to validate kwargs
 95    if PLOT_FROM in kwargs:
 96        print("Warning: the 'plot_from' argument is ignored in postcovid_plot().")
 97        del kwargs[PLOT_FROM]
 98
 99    # --- plot COVID counterfactural
100    freq = PeriodIndex(series.index).freqstr  # syntactic sugar for type hinting
101    match freq[0]:
102        case "Q":
103            start_regression = Period("2014Q4", freq=freq)
104            end_regression = Period("2019Q4", freq=freq)
105        case "M":
106            start_regression = Period("2015-01", freq=freq)
107            end_regression = Period("2020-01", freq=freq)
108        case "D":
109            start_regression = Period("2015-01-01", freq=freq)
110            end_regression = Period("2020-01-01", freq=freq)
111
112    start_regression = Period(kwargs.pop(START_R, start_regression), freq=freq)
113    end_regression = Period(kwargs.pop(END_R, end_regression), freq=freq)
114    if start_regression >= end_regression:
115        raise ValueError("Start period must be before end period")
116
117    # --- combine data and projection
118    recent = series[series.index >= start_regression].copy()
119    recent.name = "Series"
120    projection = get_projection(recent, end_regression)
121    projection.name = "Pre-COVID projection"
122    data_set = DataFrame([projection, recent]).T
123
124    # --- activate plot settings
125    kwargs[WIDTH] = kwargs.pop(
126        WIDTH, (get_setting("line_normal"), get_setting("line_wide"))
127    )  # series line is thicker than projection
128    kwargs[STYLE] = kwargs.pop(STYLE, ("--", "-"))  # dashed regression line
129    kwargs[LABEL_SERIES] = kwargs.pop(LABEL_SERIES, True)
130    kwargs[ANNOTATE] = kwargs.pop(ANNOTATE, (False, True))  # annotate series only
131    kwargs[COLOR] = kwargs.pop(COLOR, ("darkblue", "#dd0000"))
132
133    return line_plot(
134        data_set,
135        **kwargs,
136    )

Plots a series with a PeriodIndex.

Arguments

  • data - the series to be plotted (note that this function is designed to work with a single series, not a DataFrame).
  • **kwargs - same as for line_plot() and finalise_plot().

Raises:

  • TypeError if series is not a pandas Series
  • TypeError if series does not have a PeriodIndex
  • ValueError if series does not have a D, M or Q frequency
  • ValueError if regression start is after regression end