Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/seasonal.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Seasonal Decomposition by Moving Averages
3"""
4from statsmodels.compat.pandas import deprecate_kwarg
6import numpy as np
7import pandas as pd
8from pandas.core.nanops import nanmean as pd_nanmean
9from statsmodels.tsa._stl import STL
11from statsmodels.tools.validation import array_like, PandasWrapper
12from statsmodels.tsa.tsatools import freq_to_period
13from .filters.filtertools import convolution_filter
15__all__ = ['STL', 'seasonal_decompose', 'seasonal_mean', 'DecomposeResult']
18def _extrapolate_trend(trend, npoints):
19 """
20 Replace nan values on trend's end-points with least-squares extrapolated
21 values with regression considering npoints closest defined points.
22 """
23 front = next(i for i, vals in enumerate(trend)
24 if not np.any(np.isnan(vals)))
25 back = trend.shape[0] - 1 - next(i for i, vals in enumerate(trend[::-1])
26 if not np.any(np.isnan(vals)))
27 front_last = min(front + npoints, back)
28 back_first = max(front, back - npoints)
30 k, n = np.linalg.lstsq(
31 np.c_[np.arange(front, front_last), np.ones(front_last - front)],
32 trend[front:front_last], rcond=-1)[0]
33 extra = (np.arange(0, front) * np.c_[k] + np.c_[n]).T
34 if trend.ndim == 1:
35 extra = extra.squeeze()
36 trend[:front] = extra
38 k, n = np.linalg.lstsq(
39 np.c_[np.arange(back_first, back), np.ones(back - back_first)],
40 trend[back_first:back], rcond=-1)[0]
41 extra = (np.arange(back + 1, trend.shape[0]) * np.c_[k] + np.c_[n]).T
42 if trend.ndim == 1:
43 extra = extra.squeeze()
44 trend[back + 1:] = extra
46 return trend
49@deprecate_kwarg('freq', 'period')
50def seasonal_mean(x, period):
51 """
52 Return means for each period in x. period is an int that gives the
53 number of periods per cycle. E.g., 12 for monthly. NaNs are ignored
54 in the mean.
55 """
56 return np.array([pd_nanmean(x[i::period], axis=0) for i in range(period)])
59@deprecate_kwarg('freq', 'period')
60def seasonal_decompose(x, model="additive", filt=None, period=None,
61 two_sided=True, extrapolate_trend=0):
62 """
63 Seasonal decomposition using moving averages.
65 Parameters
66 ----------
67 x : array_like
68 Time series. If 2d, individual series are in columns. x must contain 2
69 complete cycles.
70 model : {"additive", "multiplicative"}, optional
71 Type of seasonal component. Abbreviations are accepted.
72 filt : array_like, optional
73 The filter coefficients for filtering out the seasonal component.
74 The concrete moving average method used in filtering is determined by
75 two_sided.
76 period : int, optional
77 Period of the series. Must be used if x is not a pandas object or if
78 the index of x does not have a frequency. Overrides default
79 periodicity of x if x is a pandas object with a timeseries index.
80 two_sided : bool, optional
81 The moving average method used in filtering.
82 If True (default), a centered moving average is computed using the
83 filt. If False, the filter coefficients are for past values only.
84 extrapolate_trend : int or 'freq', optional
85 If set to > 0, the trend resulting from the convolution is
86 linear least-squares extrapolated on both ends (or the single one
87 if two_sided is False) considering this many (+1) closest points.
88 If set to 'freq', use `freq` closest points. Setting this parameter
89 results in no NaN values in trend or resid components.
91 Returns
92 -------
93 DecomposeResult
94 A object with seasonal, trend, and resid attributes.
96 See Also
97 --------
98 statsmodels.tsa.filters.bk_filter.bkfilter
99 Baxter-King filter.
100 statsmodels.tsa.filters.cf_filter.cffilter
101 Christiano-Fitzgerald asymmetric, random walk filter.
102 statsmodels.tsa.filters.hp_filter.hpfilter
103 Hodrick-Prescott filter.
104 statsmodels.tsa.filters.convolution_filter
105 Linear filtering via convolution.
106 statsmodels.tsa.seasonal.STL
107 Season-Trend decomposition using LOESS.
109 Notes
110 -----
111 This is a naive decomposition. More sophisticated methods should
112 be preferred.
114 The additive model is Y[t] = T[t] + S[t] + e[t]
116 The multiplicative model is Y[t] = T[t] * S[t] * e[t]
118 The seasonal component is first removed by applying a convolution
119 filter to the data. The average of this smoothed series for each
120 period is the returned seasonal component.
121 """
122 pfreq = period
123 pw = PandasWrapper(x)
124 if period is None:
125 pfreq = getattr(getattr(x, 'index', None), 'inferred_freq', None)
127 x = array_like(x, 'x', maxdim=2)
128 nobs = len(x)
130 if not np.all(np.isfinite(x)):
131 raise ValueError("This function does not handle missing values")
132 if model.startswith('m'):
133 if np.any(x <= 0):
134 raise ValueError("Multiplicative seasonality is not appropriate "
135 "for zero and negative values")
137 if period is None:
138 if pfreq is not None:
139 pfreq = freq_to_period(pfreq)
140 period = pfreq
141 else:
142 raise ValueError("You must specify a period or x must be a "
143 "pandas object with a DatetimeIndex with "
144 "a freq not set to None")
145 if x.shape[0] < 2 * pfreq:
146 raise ValueError('x must have 2 complete cycles requires {0} '
147 'observations. x only has {1} '
148 'observation(s)'.format(2 * pfreq, x.shape[0]))
150 if filt is None:
151 if period % 2 == 0: # split weights at ends
152 filt = np.array([.5] + [1] * (period - 1) + [.5]) / period
153 else:
154 filt = np.repeat(1. / period, period)
156 nsides = int(two_sided) + 1
157 trend = convolution_filter(x, filt, nsides)
159 if extrapolate_trend == 'freq':
160 extrapolate_trend = period - 1
162 if extrapolate_trend > 0:
163 trend = _extrapolate_trend(trend, extrapolate_trend + 1)
165 if model.startswith('m'):
166 detrended = x / trend
167 else:
168 detrended = x - trend
170 period_averages = seasonal_mean(detrended, period)
172 if model.startswith('m'):
173 period_averages /= np.mean(period_averages, axis=0)
174 else:
175 period_averages -= np.mean(period_averages, axis=0)
177 seasonal = np.tile(period_averages.T, nobs // period + 1).T[:nobs]
179 if model.startswith('m'):
180 resid = x / seasonal / trend
181 else:
182 resid = detrended - seasonal
184 results = []
185 for s, name in zip((seasonal, trend, resid, x),
186 ('seasonal', 'trend', 'resid', None)):
187 results.append(pw.wrap(s.squeeze(), columns=name))
188 return DecomposeResult(seasonal=results[0], trend=results[1],
189 resid=results[2], observed=results[3])
192class DecomposeResult(object):
193 """
194 Results class for seasonal decompositions
196 Parameters
197 ----------
198 observed : array_like
199 The data series that has been decomposed.
200 seasonal : array_like
201 The seasonal component of the data series.
202 trend : array_like
203 The trend component of the data series.
204 resid : array_like
205 The residual component of the data series.
206 weights : array_like, optional
207 The weights used to reduce outlier influence.
208 """
209 def __init__(self, observed, seasonal, trend, resid, weights=None):
210 self._seasonal = seasonal
211 self._trend = trend
212 if weights is None:
213 weights = np.ones_like(observed)
214 if isinstance(observed, pd.Series):
215 weights = pd.Series(weights, index=observed.index,
216 name='weights')
217 self._weights = weights
218 self._resid = resid
219 self._observed = observed
221 @property
222 def observed(self):
223 """Observed data"""
224 return self._observed
226 @property
227 def seasonal(self):
228 """The estimated seasonal component"""
229 return self._seasonal
231 @property
232 def trend(self):
233 """The estimated trend component"""
234 return self._trend
236 @property
237 def resid(self):
238 """The estimated residuals"""
239 return self._resid
241 @property
242 def weights(self):
243 """The weights used in the robust estimation"""
244 return self._weights
246 @property
247 def nobs(self):
248 """Number of observations"""
249 return self._observed.shape
251 def plot(self, observed=True, seasonal=True, trend=True, resid=True,
252 weights=False):
253 """
254 Plot estimated components
256 Parameters
257 ----------
258 observed : bool
259 Include the observed series in the plot
260 seasonal : bool
261 Include the seasonal component in the plot
262 trend : bool
263 Include the trend component in the plot
264 resid : bool
265 Include the residual in the plot
266 weights : bool
267 Include the weights in the plot (if any)
269 Returns
270 -------
271 matplotlib.figure.Figure
272 The figure instance that containing the plot.
273 """
274 from statsmodels.graphics.utils import _import_mpl
275 from pandas.plotting import register_matplotlib_converters
276 plt = _import_mpl()
277 register_matplotlib_converters()
278 series = [(self._observed, 'Observed')] if observed else []
279 series += [(self.trend, 'trend')] if trend else []
280 series += [(self.seasonal, 'seasonal')] if seasonal else []
281 series += [(self.resid, 'residual')] if resid else []
282 series += [(self.weights, 'weights')] if weights else []
284 if isinstance(self._observed, (pd.DataFrame, pd.Series)):
285 nobs = self._observed.shape[0]
286 xlim = self._observed.index[0], self._observed.index[nobs - 1]
287 else:
288 xlim = (0, self._observed.shape[0] - 1)
290 fig, axs = plt.subplots(len(series), 1)
291 for i, (ax, (series, def_name)) in enumerate(zip(axs, series)):
292 if def_name != 'residual':
293 ax.plot(series)
294 else:
295 ax.plot(series, marker='o', linestyle='none')
296 ax.plot(xlim, (0, 0), color='#000000', zorder=-3)
297 name = getattr(series, 'name', def_name)
298 if def_name != 'Observed':
299 name = name.capitalize()
300 title = ax.set_title if i == 0 and observed else ax.set_ylabel
301 title(name)
302 ax.set_xlim(xlim)
304 fig.tight_layout()
305 return fig