Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/arima/specification.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2SARIMAX specification class.
4Author: Chad Fulton
5License: BSD-3
6"""
7import numpy as np
8import pandas as pd
10from statsmodels.tools.data import _is_using_pandas
11from statsmodels.tsa.base.tsa_model import TimeSeriesModel
12from statsmodels.tsa.statespace.tools import (
13 is_invertible, constrain_stationary_univariate as constrain,
14 unconstrain_stationary_univariate as unconstrain,
15 prepare_exog, prepare_trend_spec, prepare_trend_data)
17from statsmodels.tsa.arima.tools import standardize_lag_order, validate_basic
20class SARIMAXSpecification(object):
21 """
22 SARIMAX specification.
24 Parameters
25 ----------
26 endog : array_like, optional
27 The observed time-series process :math:`y`.
28 exog : array_like, optional
29 Array of exogenous regressors.
30 order : tuple, optional
31 The (p,d,q) order of the model for the autoregressive, differences, and
32 moving average components. d is always an integer, while p and q may
33 either be integers or lists of integers. May not be used in combination
34 with the arguments `ar_order`, `diff`, or `ma_order`.
35 seasonal_order : tuple, optional
36 The (P,D,Q,s) order of the seasonal component of the model for the
37 AR parameters, differences, MA parameters, and periodicity. Default
38 is (0, 0, 0, 0). D and s are always integers, while P and Q
39 may either be integers or lists of positive integers. May not be used
40 in combination with the arguments `seasonal_ar_order`, `seasonal_diff`,
41 or `seasonal_ma_order`.
42 ar_order : int or list of int
43 The autoregressive order of the model. May be an integer, in which case
44 all autoregressive lags up to and including it will be included.
45 Alternatively, may be a list of integers specifying which lag orders
46 are included. May not be used in combination with `order`.
47 diff : int
48 The order of integration of the model. May not be used in combination
49 with `order`.
50 ma_order : int or list of int
51 The moving average order of the model. May be an integer or
52 list of integers. See the documentation for `ar_order` for details.
53 May not be used in combination with `order`.
54 seasonal_ar_order : int or list of int
55 The seasonal autoregressive order of the model. May be an integer or
56 list of integers. See the documentation for `ar_order` for examples.
57 Note that if `seasonal_periods = 4` and `seasonal_ar_order = 2`, then
58 this implies that the overall model will include lags 4 and 8.
59 May not be used in combination with `seasonal_order`.
60 seasonal_diff : int
61 The order of seasonal integration of the model. May not be used in
62 combination with `seasonal_order`.
63 seasonal_ma_order : int or list of int
64 The moving average order of the model. May be an integer or
65 list of integers. See the documentation for `ar_order` and
66 `seasonal_ar_order` for additional details. May not be used in
67 combination with `seasonal_order`.
68 seasonal_periods : int
69 Number of periods in a season. May not be used in combination with
70 `seasonal_order`.
71 enforce_stationarity : bool, optional
72 Whether or not to require the autoregressive parameters to correspond
73 to a stationarity process. This is only possible in estimation by
74 numerical maximum likelihood.
75 enforce_invertibility : bool, optional
76 Whether or not to require the moving average parameters to correspond
77 to an invertible process. This is only possible in estimation by
78 numerical maximum likelihood.
79 concentrate_scale : bool, optional
80 Whether or not to concentrate the scale (variance of the error term)
81 out of the likelihood. This reduces the number of parameters by one.
82 This is only applicable when considering estimation by numerical
83 maximum likelihood.
84 dates : array-like of datetime, optional
85 If no index is given by `endog` or `exog`, an array-like object of
86 datetime objects can be provided.
87 freq : str, optional
88 If no index is given by `endog` or `exog`, the frequency of the
89 time-series may be specified here as a Pandas offset or offset string.
90 missing : str
91 Available options are 'none', 'drop', and 'raise'. If 'none', no nan
92 checking is done. If 'drop', any observations with nans are dropped.
93 If 'raise', an error is raised. Default is 'none'.
95 Attributes
96 ----------
97 order : tuple, optional
98 The (p,d,q) order of the model for the autoregressive, differences, and
99 moving average components. d is always an integer, while p and q may
100 either be integers or lists of integers.
101 seasonal_order : tuple, optional
102 The (P,D,Q,s) order of the seasonal component of the model for the
103 AR parameters, differences, MA parameters, and periodicity. Default
104 is (0, 0, 0, 0). D and s are always integers, while P and Q
105 may either be integers or lists of positive integers.
106 ar_order : int or list of int
107 The autoregressive order of the model. May be an integer, in which case
108 all autoregressive lags up to and including it will be included. For
109 example, if `ar_order = 3`, then the model will include lags 1, 2,
110 and 3. Alternatively, may be a list of integers specifying exactly
111 which lag orders are included. For example, if `ar_order = [1, 3]`,
112 then the model will include lags 1 and 3 but will exclude lag 2.
113 diff : int
114 The order of integration of the model.
115 ma_order : int or list of int
116 The moving average order of the model. May be an integer or
117 list of integers. See the documentation for `ar_order` for examples.
118 seasonal_ar_order : int or list of int
119 The seasonal autoregressive order of the model. May be an integer or
120 list of integers. See the documentation for `ar_order` for examples.
121 Note that if `seasonal_periods = 4` and `seasonal_ar_order = 2`, then
122 this implies that the overall model will include lags 4 and 8.
123 seasonal_diff : int
124 The order of seasonal integration of the model.
125 seasonal_ma_order : int or list of int
126 The moving average order of the model. May be an integer or
127 list of integers. See the documentation for `ar_order` and
128 `seasonal_ar_order` for additional details.
129 seasonal_periods : int
130 Number of periods in a season.
131 trend : str{'n','c','t','ct'} or iterable, optional
132 Parameter controlling the deterministic trend polynomial :math:`A(t)`.
133 Can be specified as a string where 'c' indicates a constant (i.e. a
134 degree zero component of the trend polynomial), 't' indicates a
135 linear trend with time, and 'ct' is both. Can also be specified as an
136 iterable defining the polynomial as in `numpy.poly1d`, where
137 `[1,1,0,1]` would denote :math:`a + bt + ct^3`. Default is to not
138 include a trend component.
139 ar_lags : list of int
140 List of included autoregressive lags. If `ar_order` is a list, then
141 `ar_lags == ar_order`. If `ar_lags = [1, 2]`, then the overall model
142 will include the 1st and 2nd autoregressive lags.
143 ma_lags : list of int
144 List of included moving average lags. If `ma_order` is a list, then
145 `ma_lags == ma_order`. If `ma_lags = [1, 2]`, then the overall model
146 will include the 1st and 2nd moving average lags.
147 seasonal_ar_lags : list of int
148 List of included seasonal autoregressive lags. If `seasonal_ar_order`
149 is a list, then `seasonal_ar_lags == seasonal_ar_order`. If
150 `seasonal_periods = 4` and `seasonal_ar_lags = [1, 2]`, then the
151 overall model will include the 4th and 8th autoregressive lags.
152 seasonal_ma_lags : list of int
153 List of included seasonal moving average lags. If `seasonal_ma_order`
154 is a list, then `seasonal_ma_lags == seasonal_ma_order`. See the
155 documentation to `seasonal_ar_lags` for examples.
156 max_ar_order : int
157 Largest included autoregressive lag.
158 max_ma_order : int
159 Largest included moving average lag.
160 max_seasonal_ar_order : int
161 Largest included seasonal autoregressive lag.
162 max_seasonal_ma_order : int
163 Largest included seasonal moving average lag.
164 max_reduced_ar_order : int
165 Largest lag in the reduced autoregressive polynomial. Equal to
166 `max_ar_order + max_seasonal_ar_order * seasonal_periods`.
167 max_reduced_ma_order : int
168 Largest lag in the reduced moving average polynomial. Equal to
169 `max_ma_order + max_seasonal_ma_order * seasonal_periods`.
170 enforce_stationarity : bool
171 Whether or not to transform the AR parameters to enforce stationarity
172 in the autoregressive component of the model. This is only possible
173 in estimation by numerical maximum likelihood.
174 enforce_invertibility : bool
175 Whether or not to transform the MA parameters to enforce invertibility
176 in the moving average component of the model. This is only possible
177 in estimation by numerical maximum likelihood.
178 concentrate_scale : bool
179 Whether or not to concentrate the variance (scale term) out of the
180 log-likelihood function. This is only applicable when considering
181 estimation by numerical maximum likelihood.
182 is_ar_consecutive
183 is_ma_consecutive
184 is_integrated
185 is_seasonal
186 k_exog_params
187 k_ar_params
188 k_ma_params
189 k_seasonal_ar_params
190 k_seasonal_ma_params
191 k_params
192 exog_names
193 ar_names
194 ma_names
195 seasonal_ar_names
196 seasonal_ma_names
197 param_names
199 Examples
200 --------
201 >>> SARIMAXSpecification(order=(1, 0, 2))
202 SARIMAXSpecification(endog=y, order=(1, 0, 2))
204 >>> spec = SARIMAXSpecification(ar_order=1, ma_order=2)
205 SARIMAXSpecification(endog=y, order=(1, 0, 2))
207 >>> spec = SARIMAXSpecification(ar_order=1, seasonal_order=(1, 0, 0, 4))
208 SARIMAXSpecification(endog=y, order=(1, 0, 0), seasonal_order=(1, 0, 0, 4))
209 """
211 def __init__(self, endog=None, exog=None, order=None,
212 seasonal_order=None, ar_order=None, diff=None, ma_order=None,
213 seasonal_ar_order=None, seasonal_diff=None,
214 seasonal_ma_order=None, seasonal_periods=None, trend=None,
215 enforce_stationarity=None, enforce_invertibility=None,
216 concentrate_scale=None, trend_offset=1, dates=None, freq=None,
217 missing='none'):
219 # Basic parameters
220 self.enforce_stationarity = enforce_stationarity
221 self.enforce_invertibility = enforce_invertibility
222 self.concentrate_scale = concentrate_scale
223 self.trend_offset = trend_offset
225 # Validate that we were not given conflicting specifications
226 has_order = order is not None
227 has_specific_order = (ar_order is not None or diff is not None or
228 ma_order is not None)
229 has_seasonal_order = seasonal_order is not None
230 has_specific_seasonal_order = (seasonal_ar_order is not None or
231 seasonal_diff is not None or
232 seasonal_ma_order is not None or
233 seasonal_periods is not None)
234 if has_order and has_specific_order:
235 raise ValueError('Cannot specify both `order` and either of'
236 ' `ar_order` or `ma_order`.')
237 if has_seasonal_order and has_specific_seasonal_order:
238 raise ValueError('Cannot specify both `seasonal_order` and any of'
239 ' `seasonal_ar_order`, `seasonal_ma_order`,'
240 ' or `seasonal_periods`.')
242 # Compute `order`
243 if has_specific_order:
244 ar_order = 0 if ar_order is None else ar_order
245 diff = 0 if diff is None else diff
246 ma_order = 0 if ma_order is None else ma_order
247 order = (ar_order, diff, ma_order)
248 elif not has_order:
249 order = (0, 0, 0)
251 # Compute `seasonal_order`
252 if has_specific_seasonal_order:
253 seasonal_ar_order = (
254 0 if seasonal_ar_order is None else seasonal_ar_order)
255 seasonal_diff = 0 if seasonal_diff is None else seasonal_diff
256 seasonal_ma_order = (
257 0 if seasonal_ma_order is None else seasonal_ma_order)
258 seasonal_periods = (
259 0 if seasonal_periods is None else seasonal_periods)
260 seasonal_order = (seasonal_ar_order, seasonal_diff,
261 seasonal_ma_order, seasonal_periods)
262 elif not has_seasonal_order:
263 seasonal_order = (0, 0, 0, 0)
265 # Validate shapes of `order`, `seasonal_order`
266 if len(order) != 3:
267 raise ValueError('`order` argument must be an iterable with three'
268 ' elements.')
269 if len(seasonal_order) != 4:
270 raise ValueError('`seasonal_order` argument must be an iterable'
271 ' with four elements.')
273 # Validate differencing parameters
274 if order[1] < 0:
275 raise ValueError('Cannot specify negative differencing.')
276 if order[1] != int(order[1]):
277 raise ValueError('Cannot specify fractional differencing.')
278 if seasonal_order[1] < 0:
279 raise ValueError('Cannot specify negative seasonal differencing.')
280 if seasonal_order[1] != int(seasonal_order[1]):
281 raise ValueError('Cannot specify fractional seasonal'
282 ' differencing.')
283 if seasonal_order[3] < 0:
284 raise ValueError('Cannot specify negative seasonal periodicity.')
286 # Standardize to integers or lists of integers
287 order = (
288 standardize_lag_order(order[0], 'AR'),
289 int(order[1]),
290 standardize_lag_order(order[2], 'MA'))
291 seasonal_order = (
292 standardize_lag_order(seasonal_order[0], 'seasonal AR'),
293 int(seasonal_order[1]),
294 standardize_lag_order(seasonal_order[2], 'seasonal MA'),
295 int(seasonal_order[3]))
297 # Validate seasonals
298 if seasonal_order[3] == 1:
299 raise ValueError('Seasonal periodicity must be greater than 1.')
300 if ((seasonal_order[0] != 0 or seasonal_order[1] != 0 or
301 seasonal_order[2] != 0) and seasonal_order[3] == 0):
302 raise ValueError('Must include nonzero seasonal periodicity if'
303 ' including seasonal AR, MA, or differencing.')
305 # Basic order
306 self.order = order
307 self.ar_order, self.diff, self.ma_order = order
309 self.seasonal_order = seasonal_order
310 (self.seasonal_ar_order, self.seasonal_diff, self.seasonal_ma_order,
311 self.seasonal_periods) = seasonal_order
313 # Lists of included lags
314 if isinstance(self.ar_order, list):
315 self.ar_lags = self.ar_order
316 else:
317 self.ar_lags = np.arange(1, self.ar_order + 1).tolist()
318 if isinstance(self.ma_order, list):
319 self.ma_lags = self.ma_order
320 else:
321 self.ma_lags = np.arange(1, self.ma_order + 1).tolist()
323 if isinstance(self.seasonal_ar_order, list):
324 self.seasonal_ar_lags = self.seasonal_ar_order
325 else:
326 self.seasonal_ar_lags = (
327 np.arange(1, self.seasonal_ar_order + 1).tolist())
328 if isinstance(self.seasonal_ma_order, list):
329 self.seasonal_ma_lags = self.seasonal_ma_order
330 else:
331 self.seasonal_ma_lags = (
332 np.arange(1, self.seasonal_ma_order + 1).tolist())
334 # Maximum lag orders
335 self.max_ar_order = self.ar_lags[-1] if self.ar_lags else 0
336 self.max_ma_order = self.ma_lags[-1] if self.ma_lags else 0
338 self.max_seasonal_ar_order = (
339 self.seasonal_ar_lags[-1] if self.seasonal_ar_lags else 0)
340 self.max_seasonal_ma_order = (
341 self.seasonal_ma_lags[-1] if self.seasonal_ma_lags else 0)
343 self.max_reduced_ar_order = (
344 self.max_ar_order +
345 self.max_seasonal_ar_order * self.seasonal_periods)
346 self.max_reduced_ma_order = (
347 self.max_ma_order +
348 self.max_seasonal_ma_order * self.seasonal_periods)
350 # Check that we don't have duplicate AR or MA lags from the seasonal
351 # component
352 ar_lags = set(self.ar_lags)
353 seasonal_ar_lags = set(np.array(self.seasonal_ar_lags)
354 * self.seasonal_periods)
355 duplicate_ar_lags = ar_lags.intersection(seasonal_ar_lags)
356 if len(duplicate_ar_lags) > 0:
357 raise ValueError('Invalid model: autoregressive lag(s) %s are'
358 ' in both the seasonal and non-seasonal'
359 ' autoregressive components.'
360 % duplicate_ar_lags)
362 ma_lags = set(self.ma_lags)
363 seasonal_ma_lags = set(np.array(self.seasonal_ma_lags)
364 * self.seasonal_periods)
365 duplicate_ma_lags = ma_lags.intersection(seasonal_ma_lags)
366 if len(duplicate_ma_lags) > 0:
367 raise ValueError('Invalid model: moving average lag(s) %s are'
368 ' in both the seasonal and non-seasonal'
369 ' moving average components.'
370 % duplicate_ma_lags)
372 # Handle trend
373 self.trend_poly, _ = prepare_trend_spec(trend)
374 # This contains the included exponents of the trend polynomial,
375 # where e.g. the constant term has exponent 0, a linear trend has
376 # exponent 1, etc.
377 self.trend_terms = np.where(self.trend_poly == 1)[0]
378 # Trend order is either the degree of the trend polynomial, if all
379 # exponents are included, or a list of included exponents. Here we need
380 # to make a distinction between a degree zero polynomial (i.e. a
381 # constant) and the zero polynomial (i.e. not even a constant). The
382 # former has `trend_order = 0`, while the latter has
383 # `trend_order = None`.
384 self.k_trend = len(self.trend_terms)
385 if len(self.trend_terms) == 0:
386 self.trend_order = None
387 self.trend_degree = None
388 elif np.all(self.trend_terms == np.arange(len(self.trend_terms))):
389 self.trend_order = self.trend_terms[-1]
390 self.trend_degree = self.trend_terms[-1]
391 else:
392 self.trend_order = self.trend_terms
393 self.trend_degree = self.trend_terms[-1]
395 # Handle endog / exog
396 # Standardize exog
397 self.k_exog, exog = prepare_exog(exog)
399 # Standardize endog (including creating a faux endog if necessary)
400 faux_endog = endog is None
401 if endog is None:
402 endog = [] if exog is None else np.zeros(len(exog)) * np.nan
404 # Add trend data into exog
405 nobs = len(endog) if exog is None else len(exog)
406 if self.trend_order is not None:
407 trend_data = self.construct_trend_data(nobs, trend_offset)
408 if exog is None:
409 exog = trend_data
410 elif _is_using_pandas(exog, None):
411 trend_data = pd.DataFrame(trend_data, index=exog.index,
412 columns=self.construct_trend_names())
413 exog = pd.concat([trend_data, exog], axis=1)
414 else:
415 exog = np.c_[trend_data, exog]
417 # Create an underlying time series model, to handle endog / exog,
418 # especially validating shapes, retrieving names, and potentially
419 # providing us with a time series index
420 self._model = TimeSeriesModel(endog, exog=exog, dates=dates, freq=freq,
421 missing=missing)
422 self.endog = None if faux_endog else self._model.endog
423 self.exog = self._model.exog
425 # Validate endog shape
426 if not faux_endog and self.endog.ndim > 1 and self.endog.shape[1] > 1:
427 raise ValueError('SARIMAX models require univariate `endog`. Got'
428 ' shape %s.' % str(self.endog.shape))
430 self._has_missing = (
431 None if faux_endog else np.any(np.isnan(self.endog)))
433 @property
434 def is_ar_consecutive(self):
435 """
436 (bool) Is autoregressive lag polynomial consecutive.
438 I.e. does it include all lags up to and including the maximum lag.
439 """
440 return (self.max_seasonal_ar_order == 0 and
441 not isinstance(self.ar_order, list))
443 @property
444 def is_ma_consecutive(self):
445 """
446 (bool) Is moving average lag polynomial consecutive.
448 I.e. does it include all lags up to and including the maximum lag.
449 """
450 return (self.max_seasonal_ma_order == 0 and
451 not isinstance(self.ma_order, list))
453 @property
454 def is_integrated(self):
455 """
456 (bool) Is the model integrated.
458 I.e. does it have a nonzero `diff` or `seasonal_diff`.
459 """
460 return self.diff > 0 or self.seasonal_diff > 0
462 @property
463 def is_seasonal(self):
464 """(bool) Does the model include a seasonal component."""
465 return self.seasonal_periods != 0
467 @property
468 def k_exog_params(self):
469 """(int) Number of parameters associated with exogenous variables."""
470 return len(self.exog_names)
472 @property
473 def k_ar_params(self):
474 """(int) Number of autoregressive (non-seasonal) parameters."""
475 return len(self.ar_lags)
477 @property
478 def k_ma_params(self):
479 """(int) Number of moving average (non-seasonal) parameters."""
480 return len(self.ma_lags)
482 @property
483 def k_seasonal_ar_params(self):
484 """(int) Number of seasonal autoregressive parameters."""
485 return len(self.seasonal_ar_lags)
487 @property
488 def k_seasonal_ma_params(self):
489 """(int) Number of seasonal moving average parameters."""
490 return len(self.seasonal_ma_lags)
492 @property
493 def k_params(self):
494 """(int) Total number of model parameters."""
495 k_params = (self.k_exog_params + self.k_ar_params + self.k_ma_params +
496 self.k_seasonal_ar_params + self.k_seasonal_ma_params)
497 if not self.concentrate_scale:
498 k_params += 1
499 return k_params
501 @property
502 def exog_names(self):
503 """(list of str) Names associated with exogenous parameters."""
504 exog_names = self._model.exog_names
505 return [] if exog_names is None else exog_names
507 @property
508 def ar_names(self):
509 """(list of str) Names of (non-seasonal) autoregressive parameters."""
510 return ['ar.L%d' % i for i in self.ar_lags]
512 @property
513 def ma_names(self):
514 """(list of str) Names of (non-seasonal) moving average parameters."""
515 return ['ma.L%d' % i for i in self.ma_lags]
517 @property
518 def seasonal_ar_names(self):
519 """(list of str) Names of seasonal autoregressive parameters."""
520 s = self.seasonal_periods
521 return ['ar.S.L%d' % (i * s) for i in self.seasonal_ar_lags]
523 @property
524 def seasonal_ma_names(self):
525 """(list of str) Names of seasonal moving average parameters."""
526 s = self.seasonal_periods
527 return ['ma.S.L%d' % (i * s) for i in self.seasonal_ma_lags]
529 @property
530 def param_names(self):
531 """(list of str) Names of all model parameters."""
532 names = (self.exog_names + self.ar_names + self.ma_names +
533 self.seasonal_ar_names + self.seasonal_ma_names)
534 if not self.concentrate_scale:
535 names.append('sigma2')
536 return names
538 @property
539 def valid_estimators(self):
540 """
541 (list of str) Estimators that could be used with specification.
543 Note: does not consider the presense of `exog` in determining valid
544 estimators. If there are exogenous variables, then feasible Generalized
545 Least Squares should be used through the `gls` estimator, and the
546 `valid_estimators` are the estimators that could be passed as the
547 `arma_estimator` argument to `gls`.
548 """
549 estimators = set(['yule_walker', 'burg', 'innovations',
550 'hannan_rissanen', 'innovations_mle', 'statespace'])
552 # Properties
553 has_ar = self.max_ar_order != 0
554 has_ma = self.max_ma_order != 0
555 has_seasonal = self.seasonal_periods != 0
557 # Only state space can handle missing data or concentrated scale
558 if self._has_missing:
559 estimators.intersection_update(['statespace'])
561 # Only numerical MLE estimators can enforce restrictions
562 if ((self.enforce_stationarity and self.max_ar_order > 0) or
563 (self.enforce_invertibility and self.max_ma_order > 0)):
564 estimators.intersection_update(['innovations_mle', 'statespace'])
566 # Innovations: no AR, non-consecutive MA, seasonal
567 if has_ar or not self.is_ma_consecutive or has_seasonal:
568 estimators.discard('innovations')
569 # Yule-Walker/Burg: no MA, non-consecutive AR, seasonal
570 if has_ma or not self.is_ar_consecutive or has_seasonal:
571 estimators.discard('yule_walker')
572 estimators.discard('burg')
573 # Hannan-Rissanen: no seasonal
574 if has_seasonal:
575 estimators.discard('hannan_rissanen')
576 # Innovations MLE: cannot have enforce_stationary=False or
577 # concentratre_scale=True
578 if self.enforce_stationarity is False or self.concentrate_scale:
579 estimators.discard('innovations_mle')
581 return estimators
583 def validate_estimator(self, estimator):
584 """
585 Validate an SARIMA estimator.
587 Parameters
588 ----------
589 estimator : str
590 Name of the estimator to validate against the current state of
591 the specification. Possible values are: 'yule_walker', 'burg',
592 'innovations', 'hannan_rissanen', 'innovoations_mle', 'statespace'.
594 Notes
595 -----
596 This method will raise a `ValueError` if an invalid method is passed,
597 and otherwise will return None.
599 This method does not consider the presense of `exog` in determining
600 valid estimators. If there are exogenous variables, then feasible
601 Generalized Least Squares should be used through the `gls` estimator,
602 and a "valid" estimator is one that could be passed as the
603 `arma_estimator` argument to `gls`.
605 This method only uses the attributes `enforce_stationarity` and
606 `concentrate_scale` to determine the validity of numerical maximum
607 likelihood estimators. These only include 'innovations_mle' (which
608 does not support `enforce_stationarity=False` or
609 `concentrate_scale=True`) and 'statespace' (which supports all
610 combinations of each).
612 Examples
613 --------
614 >>> spec = SARIMAXSpecification(order=(1, 0, 2))
616 >>> spec.validate_estimator('yule_walker')
617 ValueError: Yule-Walker estimator does not support moving average
618 components.
620 >>> spec.validate_estimator('burg')
621 ValueError: Burg estimator does not support moving average components.
623 >>> spec.validate_estimator('innovations')
624 ValueError: Burg estimator does not support autoregressive components.
626 >>> spec.validate_estimator('hannan_rissanen') # returns None
627 >>> spec.validate_estimator('innovations_mle') # returns None
628 >>> spec.validate_estimator('statespace') # returns None
630 >>> spec.validate_estimator('not_an_estimator')
631 ValueError: "not_an_estimator" is not a valid estimator.
632 """
633 has_ar = self.max_ar_order != 0
634 has_ma = self.max_ma_order != 0
635 has_seasonal = self.seasonal_periods != 0
636 has_missing = self._has_missing
638 titles = {
639 'yule_walker': 'Yule-Walker',
640 'burg': 'Burg',
641 'innovations': 'Innovations',
642 'hannan_rissanen': 'Hannan-Rissanen',
643 'innovations_mle': 'Innovations MLE',
644 'statespace': 'State space'
645 }
647 # Only state space form can support missing data
648 if estimator != 'statespace':
649 if has_missing:
650 raise ValueError('%s estimator does not support missing'
651 ' values in `endog`.' % titles[estimator])
653 # Only state space and innovations MLE can enforce parameter
654 # restrictions
655 if estimator not in ['innovations_mle', 'statespace']:
656 if self.max_ar_order > 0 and self.enforce_stationarity:
657 raise ValueError('%s estimator cannot enforce a stationary'
658 ' autoregressive lag polynomial.'
659 % titles[estimator])
660 if self.max_ma_order > 0 and self.enforce_invertibility:
661 raise ValueError('%s estimator cannot enforce an invertible'
662 ' moving average lag polynomial.'
663 % titles[estimator])
665 # Now go through specific disqualifications for each estimator
666 if estimator in ['yule_walker', 'burg']:
667 if has_seasonal:
668 raise ValueError('%s estimator does not support seasonal'
669 ' components.' % titles[estimator])
670 if not self.is_ar_consecutive:
671 raise ValueError('%s estimator does not support'
672 ' non-consecutive autoregressive lags.'
673 % titles[estimator])
674 if has_ma:
675 raise ValueError('%s estimator does not support moving average'
676 ' components.' % titles[estimator])
677 elif estimator == 'innovations':
678 if has_seasonal:
679 raise ValueError('Innovations estimator does not support'
680 ' seasonal components.')
681 if not self.is_ma_consecutive:
682 raise ValueError('Innovations estimator does not support'
683 ' non-consecutive moving average lags.')
684 if has_ar:
685 raise ValueError('Innovations estimator does not support'
686 ' autoregressive components.')
687 elif estimator == 'hannan_rissanen':
688 if has_seasonal:
689 raise ValueError('Hannan-Rissanen estimator does not support'
690 ' seasonal components.')
691 elif estimator == 'innovations_mle':
692 if self.enforce_stationarity is False:
693 raise ValueError('Innovations MLE estimator does not support'
694 ' non-stationary autoregressive components,'
695 ' but `enforce_stationarity` is set to False')
696 if self.concentrate_scale:
697 raise ValueError('Innovations MLE estimator does not support'
698 ' concentrating the scale out of the'
699 ' log-likelihood function')
700 elif estimator == 'statespace':
701 # State space form supports all variations of SARIMAX.
702 pass
703 else:
704 raise ValueError('"%s" is not a valid estimator.' % estimator)
706 def split_params(self, params, allow_infnan=False):
707 """
708 Split parameter array by type into dictionary.
710 Parameters
711 ----------
712 params : array_like
713 Array of model parameters.
714 allow_infnan : bool, optional
715 Whether or not to allow `params` to contain -np.Inf, np.Inf, and
716 np.nan. Default is False.
718 Returns
719 -------
720 split_params : dict
721 Dictionary with keys 'exog_params', 'ar_params', 'ma_params',
722 'seasonal_ar_params', 'seasonal_ma_params', and (unless
723 `concentrate_scale=True`) 'sigma2'. Values are the parameters
724 associated with the key, based on the `params` argument.
726 Examples
727 --------
728 >>> spec = SARIMAXSpecification(ar_order=1)
729 >>> spec.split_params([0.5, 4])
730 {'exog_params': array([], dtype=float64),
731 'ar_params': array([0.5]),
732 'ma_params': array([], dtype=float64),
733 'seasonal_ar_params': array([], dtype=float64),
734 'seasonal_ma_params': array([], dtype=float64),
735 'sigma2': 4.0}
736 """
737 params = validate_basic(params, self.k_params,
738 allow_infnan=allow_infnan,
739 title='joint parameters')
741 ix = [self.k_exog_params, self.k_ar_params, self.k_ma_params,
742 self.k_seasonal_ar_params, self.k_seasonal_ma_params]
743 names = ['exog_params', 'ar_params', 'ma_params',
744 'seasonal_ar_params', 'seasonal_ma_params']
745 if not self.concentrate_scale:
746 ix.append(1)
747 names.append('sigma2')
748 ix = np.cumsum(ix)
750 out = dict(zip(names, np.split(params, ix)))
751 if 'sigma2' in out:
752 out['sigma2'] = out['sigma2'].item()
754 return out
756 def join_params(self, exog_params=None, ar_params=None, ma_params=None,
757 seasonal_ar_params=None, seasonal_ma_params=None,
758 sigma2=None):
759 """
760 Join parameters into a single vector.
762 Parameters
763 ----------
764 exog_params : array_like, optional
765 Parameters associated with exogenous regressors. Required if
766 `exog` is part of specification.
767 ar_params : array_like, optional
768 Parameters associated with (non-seasonal) autoregressive component.
769 Required if this component is part of the specification.
770 ma_params : array_like, optional
771 Parameters associated with (non-seasonal) moving average component.
772 Required if this component is part of the specification.
773 seasonal_ar_params : array_like, optional
774 Parameters associated with seasonal autoregressive component.
775 Required if this component is part of the specification.
776 seasonal_ma_params : array_like, optional
777 Parameters associated with seasonal moving average component.
778 Required if this component is part of the specification.
779 sigma2 : array_like, optional
780 Innovation variance parameter. Required unless
781 `concentrated_scale=True`.
783 Returns
784 -------
785 params : ndarray
786 Array of parameters.
788 Examples
789 --------
790 >>> spec = SARIMAXSpecification(ar_order=1)
791 >>> spec.join_params(ar_params=0.5, sigma2=4)
792 array([0.5, 4. ])
793 """
794 definitions = [
795 ('exogenous variables', self.k_exog_params, exog_params),
796 ('AR terms', self.k_ar_params, ar_params),
797 ('MA terms', self.k_ma_params, ma_params),
798 ('seasonal AR terms', self.k_seasonal_ar_params,
799 seasonal_ar_params),
800 ('seasonal MA terms', self.k_seasonal_ma_params,
801 seasonal_ma_params),
802 ('variance', int(not self.concentrate_scale), sigma2)]
804 params_list = []
805 for title, k, params in definitions:
806 if k > 0:
807 # Validate
808 if params is None:
809 raise ValueError('Specification includes %s, but no'
810 ' parameters were provided.' % title)
811 params = np.atleast_1d(np.squeeze(params))
812 if not params.shape == (k,):
813 raise ValueError('Specification included %d %s, but'
814 ' parameters with shape %s were provided.'
815 % (k, title, params.shape))
817 # Otherwise add to the list
818 params_list.append(params)
820 return np.concatenate(params_list)
822 def validate_params(self, params):
823 """
824 Validate parameter vector by raising ValueError on invalid values.
826 Parameters
827 ----------
828 params : array_like
829 Array of model parameters.
831 Notes
832 -----
833 Primarily checks that the parameters have the right shape and are not
834 NaN or infinite. Also checks if parameters are consistent with a
835 stationary process if `enforce_stationarity=True` and that they are
836 consistent with an invertible process if `enforce_invertibility=True`.
837 Finally, checks that the variance term is positive, unless
838 `concentrate_scale=True`.
840 Examples
841 --------
842 >>> spec = SARIMAXSpecification(ar_order=1)
843 >>> spec.validate_params([-0.5, 4.]) # returns None
844 >>> spec.validate_params([-0.5, -2])
845 ValueError: Non-positive variance term.
846 >>> spec.validate_params([-1.5, 4.])
847 ValueError: Non-stationary autoregressive polynomial.
848 """
849 # Note: split_params includes basic validation
850 params = self.split_params(params)
852 # Specific checks
853 if self.enforce_stationarity:
854 if self.k_ar_params:
855 ar_poly = np.r_[1, -params['ar_params']]
856 if not is_invertible(ar_poly):
857 raise ValueError('Non-stationary autoregressive'
858 ' polynomial.')
859 if self.k_seasonal_ar_params:
860 seasonal_ar_poly = np.r_[1, -params['seasonal_ar_params']]
861 if not is_invertible(seasonal_ar_poly):
862 raise ValueError('Non-stationary seasonal autoregressive'
863 ' polynomial.')
865 if self.enforce_invertibility:
866 if self.k_ma_params:
867 ma_poly = np.r_[1, params['ma_params']]
868 if not is_invertible(ma_poly):
869 raise ValueError('Non-invertible moving average'
870 ' polynomial.')
871 if self.k_seasonal_ma_params:
872 seasonal_ma_poly = np.r_[1, params['seasonal_ma_params']]
873 if not is_invertible(seasonal_ma_poly):
874 raise ValueError('Non-invertible seasonal moving average'
875 ' polynomial.')
877 if not self.concentrate_scale:
878 if params['sigma2'] <= 0:
879 raise ValueError('Non-positive variance term.')
881 def constrain_params(self, unconstrained):
882 """
883 Constrain parameter values to be valid through transformations.
885 Parameters
886 ----------
887 unconstrained : array_like
888 Array of model unconstrained parameters.
890 Returns
891 -------
892 constrained : ndarray
893 Array of model parameters transformed to produce a valid model.
895 Notes
896 -----
897 This is usually only used when performing numerical minimization
898 of the log-likelihood function. This function is necessary because
899 the minimizers consider values over the entire real space, while
900 SARIMAX models require parameters in subspaces (for example positive
901 variances).
903 Examples
904 --------
905 >>> spec = SARIMAXSpecification(ar_order=1)
906 >>> spec.constrain_params([10, -2])
907 array([-0.99504, 4. ])
908 """
909 unconstrained = self.split_params(unconstrained)
910 params = {}
912 if self.k_exog_params:
913 params['exog_params'] = unconstrained['exog_params']
914 if self.k_ar_params:
915 if self.enforce_stationarity:
916 params['ar_params'] = constrain(unconstrained['ar_params'])
917 else:
918 params['ar_params'] = unconstrained['ar_params']
919 if self.k_ma_params:
920 if self.enforce_invertibility:
921 params['ma_params'] = -constrain(unconstrained['ma_params'])
922 else:
923 params['ma_params'] = unconstrained['ma_params']
924 if self.k_seasonal_ar_params:
925 if self.enforce_stationarity:
926 params['seasonal_ar_params'] = (
927 constrain(unconstrained['seasonal_ar_params']))
928 else:
929 params['seasonal_ar_params'] = (
930 unconstrained['seasonal_ar_params'])
931 if self.k_seasonal_ma_params:
932 if self.enforce_invertibility:
933 params['seasonal_ma_params'] = (
934 -constrain(unconstrained['seasonal_ma_params']))
935 else:
936 params['seasonal_ma_params'] = (
937 unconstrained['seasonal_ma_params'])
938 if not self.concentrate_scale:
939 params['sigma2'] = unconstrained['sigma2']**2
941 return self.join_params(**params)
943 def unconstrain_params(self, constrained):
944 """
945 Reverse transformations used to constrain parameter values to be valid.
947 Parameters
948 ----------
949 constrained : array_like
950 Array of model parameters.
952 Returns
953 -------
954 unconstrained : ndarray
955 Array of parameters with constraining transformions reversed.
957 Notes
958 -----
959 This is usually only used when performing numerical minimization
960 of the log-likelihood function. This function is the (approximate)
961 inverse of `constrain_params`.
963 Examples
964 --------
965 >>> spec = SARIMAXSpecification(ar_order=1)
966 >>> spec.unconstrain_params([-0.5, 4.])
967 array([0.57735, 2. ])
968 """
969 constrained = self.split_params(constrained)
970 params = {}
972 if self.k_exog_params:
973 params['exog_params'] = constrained['exog_params']
974 if self.k_ar_params:
975 if self.enforce_stationarity:
976 params['ar_params'] = unconstrain(constrained['ar_params'])
977 else:
978 params['ar_params'] = constrained['ar_params']
979 if self.k_ma_params:
980 if self.enforce_invertibility:
981 params['ma_params'] = unconstrain(-constrained['ma_params'])
982 else:
983 params['ma_params'] = constrained['ma_params']
984 if self.k_seasonal_ar_params:
985 if self.enforce_stationarity:
986 params['seasonal_ar_params'] = (
987 unconstrain(constrained['seasonal_ar_params']))
988 else:
989 params['seasonal_ar_params'] = (
990 constrained['seasonal_ar_params'])
991 if self.k_seasonal_ma_params:
992 if self.enforce_invertibility:
993 params['seasonal_ma_params'] = (
994 unconstrain(-constrained['seasonal_ma_params']))
995 else:
996 params['seasonal_ma_params'] = (
997 constrained['seasonal_ma_params'])
998 if not self.concentrate_scale:
999 params['sigma2'] = constrained['sigma2']**0.5
1001 return self.join_params(**params)
1003 def construct_trend_data(self, nobs, offset=1):
1004 if self.trend_order is None:
1005 trend_data = None
1006 else:
1007 trend_data = prepare_trend_data(
1008 self.trend_poly, int(np.sum(self.trend_poly)), nobs, offset)
1010 return trend_data
1012 def construct_trend_names(self):
1013 names = []
1014 for i in self.trend_terms:
1015 if i == 0:
1016 names.append('const')
1017 elif i == 1:
1018 names.append('drift')
1019 else:
1020 names.append('trend.%d' % i)
1021 return names
1023 def __repr__(self):
1024 """Represent SARIMAXSpecification object as a string."""
1025 components = []
1026 if self.endog is not None:
1027 components.append('endog=%s' % self._model.endog_names)
1028 if self.k_exog_params:
1029 components.append('exog=%s' % self.exog_names)
1030 components.append('order=%s' % str(self.order))
1031 if self.seasonal_periods > 0:
1032 components.append('seasonal_order=%s' % str(self.seasonal_order))
1033 if self.enforce_stationarity is not None:
1034 components.append('enforce_stationarity=%s'
1035 % self.enforce_stationarity)
1036 if self.enforce_invertibility is not None:
1037 components.append('enforce_invertibility=%s'
1038 % self.enforce_invertibility)
1039 if self.concentrate_scale is not None:
1040 components.append('concentrate_scale=%s' % self.concentrate_scale)
1041 return 'SARIMAXSpecification(%s)' % ', '.join(components)