Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/x13.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Run x12/x13-arima specs in a subprocess from Python and curry results back
3into python.
5Notes
6-----
7Many of the functions are called x12. However, they are also intended to work
8for x13. If this is not the case, it's a bug.
9"""
10from statsmodels.compat.pandas import deprecate_kwarg
12import os
13import subprocess
14import tempfile
15import re
16from warnings import warn
18import pandas as pd
20from statsmodels.compat.python import iteritems
21from statsmodels.tools.tools import Bunch
22from statsmodels.tools.sm_exceptions import (X13NotFoundError,
23 IOWarning, X13Error,
24 X13Warning)
26__all__ = ["x13_arima_select_order", "x13_arima_analysis"]
28_binary_names = ('x13as.exe', 'x13as', 'x12a.exe', 'x12a')
31class _freq_to_period:
32 def __getitem__(self, key):
33 if key.startswith('M'):
34 return 12
35 elif key.startswith('Q'):
36 return 4
37 elif key.startswith('W'):
38 return 52
41_freq_to_period = _freq_to_period()
43_period_to_freq = {12: 'M', 4: 'Q'}
44_log_to_x12 = {True: 'log', False: 'none', None: 'auto'}
45_bool_to_yes_no = lambda x: 'yes' if x else 'no' # noqa:E731
48def _find_x12(x12path=None, prefer_x13=True):
49 """
50 If x12path is not given, then either x13as[.exe] or x12a[.exe] must
51 be found on the PATH. Otherwise, the environmental variable X12PATH or
52 X13PATH must be defined. If prefer_x13 is True, only X13PATH is searched
53 for. If it is false, only X12PATH is searched for.
54 """
55 global _binary_names
56 if x12path is not None and x12path.endswith(_binary_names):
57 # remove binary from path if given
58 x12path = os.path.dirname(x12path)
60 if not prefer_x13: # search for x12 first
61 _binary_names = _binary_names[::-1]
62 if x12path is None:
63 x12path = os.getenv("X12PATH", "")
64 if not x12path:
65 x12path = os.getenv("X13PATH", "")
66 elif x12path is None:
67 x12path = os.getenv("X13PATH", "")
68 if not x12path:
69 x12path = os.getenv("X12PATH", "")
71 for binary in _binary_names:
72 x12 = os.path.join(x12path, binary)
73 try:
74 subprocess.check_call(x12, stdout=subprocess.PIPE,
75 stderr=subprocess.PIPE)
76 return x12
77 except OSError:
78 pass
80 else:
81 return False
84def _check_x12(x12path=None):
85 x12path = _find_x12(x12path)
86 if not x12path:
87 raise X13NotFoundError("x12a and x13as not found on path. Give the "
88 "path, put them on PATH, or set the "
89 "X12PATH or X13PATH environmental variable.")
90 return x12path
93def _clean_order(order):
94 """
95 Takes something like (1 1 0)(0 1 1) and returns a arma order, sarma
96 order tuple. Also accepts (1 1 0) and return arma order and (0, 0, 0)
97 """
98 order = re.findall(r"\([0-9 ]*?\)", order)
100 def clean(x):
101 return tuple(map(int, re.sub("[()]", "", x).split(" ")))
103 if len(order) > 1:
104 order, sorder = map(clean, order)
105 else:
106 order = clean(order[0])
107 sorder = (0, 0, 0)
109 return order, sorder
112def run_spec(x12path, specpath, outname=None, meta=False, datameta=False):
114 if meta and datameta:
115 raise ValueError("Cannot specify both meta and datameta.")
116 if meta:
117 args = [x12path, "-m " + specpath]
118 elif datameta:
119 args = [x12path, "-d " + specpath]
120 else:
121 args = [x12path, specpath]
123 if outname:
124 args += [outname]
126 return subprocess.Popen(args, stdout=subprocess.PIPE,
127 stderr=subprocess.STDOUT)
130def _make_automdl_options(maxorder, maxdiff, diff):
131 options = "\n"
132 options += "maxorder = ({0} {1})\n".format(maxorder[0], maxorder[1])
133 if maxdiff is not None: # maxdiff always takes precedence
134 options += "maxdiff = ({0} {1})\n".format(maxdiff[0], maxdiff[1])
135 else:
136 options += "diff = ({0} {1})\n".format(diff[0], diff[1])
137 return options
140def _make_var_names(exog):
141 if hasattr(exog, "name"):
142 var_names = exog.name
143 elif hasattr(exog, "columns"):
144 var_names = exog.columns
145 else:
146 raise ValueError("exog is not a Series or DataFrame or is unnamed.")
147 try:
148 var_names = " ".join(var_names)
149 except TypeError: # cannot have names that are numbers, pandas default
150 from statsmodels.base.data import _make_exog_names
151 if exog.ndim == 1:
152 var_names = "x1"
153 else:
154 var_names = " ".join(_make_exog_names(exog))
155 return var_names
158def _make_regression_options(trading, exog):
159 if not trading and exog is None: # start regression spec
160 return ""
162 reg_spec = "regression{\n"
163 if trading:
164 reg_spec += " variables = (td)\n"
165 if exog is not None:
166 var_names = _make_var_names(exog)
167 reg_spec += " user = ({0})\n".format(var_names)
168 reg_spec += " data = ({0})\n".format("\n".join(map(str,
169 exog.values.ravel().tolist())))
171 reg_spec += "}\n" # close out regression spec
172 return reg_spec
175def _make_forecast_options(forecast_periods):
176 if forecast_periods is None:
177 return ""
178 forecast_spec = "forecast{\n"
179 forecast_spec += "maxlead = ({0})\n}}\n".format(forecast_periods)
180 return forecast_spec
183def _check_errors(errors):
184 errors = errors[errors.find("spc:")+4:].strip()
185 if errors and 'ERROR' in errors:
186 raise X13Error(errors)
187 elif errors and 'WARNING' in errors:
188 warn(errors, X13Warning)
191def _convert_out_to_series(x, dates, name):
192 """
193 Convert x to a DataFrame where x is a string in the format given by
194 x-13arima-seats output.
195 """
196 from io import StringIO
197 from pandas import read_csv
198 out = read_csv(StringIO(x), skiprows=2,
199 header=None, sep='\t', engine='python')
200 return out.set_index(dates).rename(columns={1: name})[name]
203def _open_and_read(fname):
204 # opens a file, reads it, and make sure it's closed
205 with open(fname, 'r') as fin:
206 fout = fin.read()
207 return fout
210class Spec(object):
211 @property
212 def spec_name(self):
213 return self.__class__.__name__.replace("Spec", "")
215 def create_spec(self, **kwargs):
216 spec = """{name} {{
217 {options}
218 }}
219 """
220 return spec.format(name=self.spec_name,
221 options=self.options)
223 def set_options(self, **kwargs):
224 options = ""
225 for key, value in iteritems(kwargs):
226 options += "{0}={1}\n".format(key, value)
227 self.__dict__.update({key: value})
228 self.options = options
231class SeriesSpec(Spec):
232 """
233 Parameters
234 ----------
235 data
236 appendbcst : bool
237 appendfcst : bool
238 comptype
239 compwt
240 decimals
241 modelspan
242 name
243 period
244 precision
245 to_print
246 to_save
247 span
248 start
249 title
250 type
252 Notes
253 -----
254 Rarely used arguments
256 divpower
257 missingcode
258 missingval
259 saveprecision
260 trimzero
261 """
262 def __init__(self, data, name='Unnamed Series', appendbcst=False,
263 appendfcst=False,
264 comptype=None, compwt=1, decimals=0, modelspan=(),
265 period=12, precision=0, to_print=[], to_save=[], span=(),
266 start=(1, 1), title='', series_type=None, divpower=None,
267 missingcode=-99999, missingval=1000000000):
269 appendbcst, appendfcst = map(_bool_to_yes_no, [appendbcst,
270 appendfcst,
271 ])
273 series_name = "\"{0}\"".format(name[:64]) # trim to 64 characters
274 title = "\"{0}\"".format(title[:79]) # trim to 79 characters
275 self.set_options(data=data, appendbcst=appendbcst,
276 appendfcst=appendfcst, period=period, start=start,
277 title=title, name=series_name,
278 )
281def pandas_to_series_spec(x):
282 # from statsmodels.tools.data import _check_period_index
283 # check_period_index(x)
284 if hasattr(x, 'columns'): # convert to series
285 if len(x.columns) > 1:
286 raise ValueError("Does not handle DataFrame with more than one "
287 "column")
288 x = x[x.columns[0]]
290 data = "({0})".format("\n".join(map(str, x.values.tolist())))
292 # get periodicity
293 # get start / first data
294 # give it a title
295 try:
296 period = _freq_to_period[x.index.freqstr]
297 except (AttributeError, ValueError):
298 from pandas.tseries.api import infer_freq
299 period = _freq_to_period[infer_freq(x.index)]
300 start_date = x.index[0]
301 if period == 12:
302 year, stperiod = start_date.year, start_date.month
303 elif period == 4:
304 year, stperiod = start_date.year, start_date.quarter
305 else: # pragma: no cover
306 raise ValueError("Only monthly and quarterly periods are supported."
307 " Please report or send a pull request if you want "
308 "this extended.")
310 if hasattr(x, 'name'):
311 name = x.name or "Unnamed Series"
312 else:
313 name = 'Unnamed Series'
314 series_spec = SeriesSpec(data=data, name=name, period=period,
315 title=name, start="{0}.{1}".format(year,
316 stperiod))
317 return series_spec
320@deprecate_kwarg('forecast_years', 'forecast_periods')
321def x13_arima_analysis(endog, maxorder=(2, 1), maxdiff=(2, 1), diff=None,
322 exog=None, log=None, outlier=True, trading=False,
323 forecast_periods=None, retspec=False,
324 speconly=False, start=None, freq=None,
325 print_stdout=False, x12path=None, prefer_x13=True):
326 """
327 Perform x13-arima analysis for monthly or quarterly data.
329 Parameters
330 ----------
331 endog : array_like, pandas.Series
332 The series to model. It is best to use a pandas object with a
333 DatetimeIndex or PeriodIndex. However, you can pass an array-like
334 object. If your object does not have a dates index then ``start`` and
335 ``freq`` are not optional.
336 maxorder : tuple
337 The maximum order of the regular and seasonal ARMA polynomials to
338 examine during the model identification. The order for the regular
339 polynomial must be greater than zero and no larger than 4. The
340 order for the seasonal polynomial may be 1 or 2.
341 maxdiff : tuple
342 The maximum orders for regular and seasonal differencing in the
343 automatic differencing procedure. Acceptable inputs for regular
344 differencing are 1 and 2. The maximum order for seasonal differencing
345 is 1. If ``diff`` is specified then ``maxdiff`` should be None.
346 Otherwise, ``diff`` will be ignored. See also ``diff``.
347 diff : tuple
348 Fixes the orders of differencing for the regular and seasonal
349 differencing. Regular differencing may be 0, 1, or 2. Seasonal
350 differencing may be 0 or 1. ``maxdiff`` must be None, otherwise
351 ``diff`` is ignored.
352 exog : array_like
353 Exogenous variables.
354 log : bool or None
355 If None, it is automatically determined whether to log the series or
356 not. If False, logs are not taken. If True, logs are taken.
357 outlier : bool
358 Whether or not outliers are tested for and corrected, if detected.
359 trading : bool
360 Whether or not trading day effects are tested for.
361 forecast_periods : int
362 Number of forecasts produced. The default is None.
363 retspec : bool
364 Whether to return the created specification file. Can be useful for
365 debugging.
366 speconly : bool
367 Whether to create the specification file and then return it without
368 performing the analysis. Can be useful for debugging.
369 start : str, datetime
370 Must be given if ``endog`` does not have date information in its index.
371 Anything accepted by pandas.DatetimeIndex for the start value.
372 freq : str
373 Must be givein if ``endog`` does not have date information in its
374 index. Anything accepted by pandas.DatetimeIndex for the freq value.
375 print_stdout : bool
376 The stdout from X12/X13 is suppressed. To print it out, set this
377 to True. Default is False.
378 x12path : str or None
379 The path to x12 or x13 binary. If None, the program will attempt
380 to find x13as or x12a on the PATH or by looking at X13PATH or
381 X12PATH depending on the value of prefer_x13.
382 prefer_x13 : bool
383 If True, will look for x13as first and will fallback to the X13PATH
384 environmental variable. If False, will look for x12a first and will
385 fallback to the X12PATH environmental variable. If x12path points
386 to the path for the X12/X13 binary, it does nothing.
388 Returns
389 -------
390 Bunch
391 A bunch object containing the listed attributes.
393 - results : str
394 The full output from the X12/X13 run.
395 - seasadj : pandas.Series
396 The final seasonally adjusted ``endog``.
397 - trend : pandas.Series
398 The trend-cycle component of ``endog``.
399 - irregular : pandas.Series
400 The final irregular component of ``endog``.
401 - stdout : str
402 The captured stdout produced by x12/x13.
403 - spec : str, optional
404 Returned if ``retspec`` is True. The only thing returned if
405 ``speconly`` is True.
407 Notes
408 -----
409 This works by creating a specification file, writing it to a temporary
410 directory, invoking X12/X13 in a subprocess, and reading the output
411 directory, invoking exog12/X13 in a subprocess, and reading the output
412 back in.
413 """
414 x12path = _check_x12(x12path)
416 if not isinstance(endog, (pd.DataFrame, pd.Series)):
417 if start is None or freq is None:
418 raise ValueError("start and freq cannot be none if endog is not "
419 "a pandas object")
420 endog = pd.Series(endog, index=pd.DatetimeIndex(start=start,
421 periods=len(endog),
422 freq=freq))
423 spec_obj = pandas_to_series_spec(endog)
424 spec = spec_obj.create_spec()
425 spec += "transform{{function={0}}}\n".format(_log_to_x12[log])
426 if outlier:
427 spec += "outlier{}\n"
428 options = _make_automdl_options(maxorder, maxdiff, diff)
429 spec += "automdl{{{0}}}\n".format(options)
430 spec += _make_regression_options(trading, exog)
431 spec += _make_forecast_options(forecast_periods)
432 spec += "x11{ save=(d11 d12 d13) }"
433 if speconly:
434 return spec
435 # write it to a tempfile
436 # TODO: make this more robust - give the user some control?
437 ftempin = tempfile.NamedTemporaryFile(delete=False, suffix='.spc')
438 ftempout = tempfile.NamedTemporaryFile(delete=False)
439 try:
440 ftempin.write(spec.encode('utf8'))
441 ftempin.close()
442 ftempout.close()
443 # call x12 arima
444 p = run_spec(x12path, ftempin.name[:-4], ftempout.name)
445 p.wait()
446 stdout = p.stdout.read()
447 if print_stdout:
448 print(p.stdout.read())
449 # check for errors
450 errors = _open_and_read(ftempout.name + '.err')
451 _check_errors(errors)
453 # read in results
454 results = _open_and_read(ftempout.name + '.out')
455 seasadj = _open_and_read(ftempout.name + '.d11')
456 trend = _open_and_read(ftempout.name + '.d12')
457 irregular = _open_and_read(ftempout.name + '.d13')
458 finally:
459 try: # sometimes this gives a permission denied error?
460 # not sure why. no process should have these open
461 os.remove(ftempin.name)
462 os.remove(ftempout.name)
463 except OSError:
464 if os.path.exists(ftempin.name):
465 warn("Failed to delete resource {0}".format(ftempin.name),
466 IOWarning)
467 if os.path.exists(ftempout.name):
468 warn("Failed to delete resource {0}".format(ftempout.name),
469 IOWarning)
471 seasadj = _convert_out_to_series(seasadj, endog.index, 'seasadj')
472 trend = _convert_out_to_series(trend, endog.index, 'trend')
473 irregular = _convert_out_to_series(irregular, endog.index, 'irregular')
475 # NOTE: there is not likely anything in stdout that's not in results
476 # so may be safe to just suppress and remove it
477 if not retspec:
478 res = X13ArimaAnalysisResult(observed=endog, results=results,
479 seasadj=seasadj, trend=trend,
480 irregular=irregular, stdout=stdout)
481 else:
482 res = X13ArimaAnalysisResult(observed=endog, results=results,
483 seasadj=seasadj, trend=trend,
484 irregular=irregular, stdout=stdout,
485 spec=spec)
486 return res
489@deprecate_kwarg('forecast_years', 'forecast_periods')
490def x13_arima_select_order(endog, maxorder=(2, 1), maxdiff=(2, 1), diff=None,
491 exog=None, log=None, outlier=True, trading=False,
492 forecast_periods=None,
493 start=None, freq=None, print_stdout=False,
494 x12path=None, prefer_x13=True):
495 """
496 Perform automatic seasonal ARIMA order identification using x12/x13 ARIMA.
498 Parameters
499 ----------
500 endog : array_like, pandas.Series
501 The series to model. It is best to use a pandas object with a
502 DatetimeIndex or PeriodIndex. However, you can pass an array-like
503 object. If your object does not have a dates index then ``start`` and
504 ``freq`` are not optional.
505 maxorder : tuple
506 The maximum order of the regular and seasonal ARMA polynomials to
507 examine during the model identification. The order for the regular
508 polynomial must be greater than zero and no larger than 4. The
509 order for the seasonal polynomial may be 1 or 2.
510 maxdiff : tuple
511 The maximum orders for regular and seasonal differencing in the
512 automatic differencing procedure. Acceptable inputs for regular
513 differencing are 1 and 2. The maximum order for seasonal differencing
514 is 1. If ``diff`` is specified then ``maxdiff`` should be None.
515 Otherwise, ``diff`` will be ignored. See also ``diff``.
516 diff : tuple
517 Fixes the orders of differencing for the regular and seasonal
518 differencing. Regular differencing may be 0, 1, or 2. Seasonal
519 differencing may be 0 or 1. ``maxdiff`` must be None, otherwise
520 ``diff`` is ignored.
521 exog : array_like
522 Exogenous variables.
523 log : bool or None
524 If None, it is automatically determined whether to log the series or
525 not. If False, logs are not taken. If True, logs are taken.
526 outlier : bool
527 Whether or not outliers are tested for and corrected, if detected.
528 trading : bool
529 Whether or not trading day effects are tested for.
530 forecast_periods : int
531 Number of forecasts produced. The default is None.
532 start : str, datetime
533 Must be given if ``endog`` does not have date information in its index.
534 Anything accepted by pandas.DatetimeIndex for the start value.
535 freq : str
536 Must be givein if ``endog`` does not have date information in its
537 index. Anything accepted by pandas.DatetimeIndex for the freq value.
538 print_stdout : bool
539 The stdout from X12/X13 is suppressed. To print it out, set this
540 to True. Default is False.
541 x12path : str or None
542 The path to x12 or x13 binary. If None, the program will attempt
543 to find x13as or x12a on the PATH or by looking at X13PATH or X12PATH
544 depending on the value of prefer_x13.
545 prefer_x13 : bool
546 If True, will look for x13as first and will fallback to the X13PATH
547 environmental variable. If False, will look for x12a first and will
548 fallback to the X12PATH environmental variable. If x12path points
549 to the path for the X12/X13 binary, it does nothing.
551 Returns
552 -------
553 Bunch
554 A bunch object containing the listed attributes.
556 - order : tuple
557 The regular order.
558 - sorder : tuple
559 The seasonal order.
560 - include_mean : bool
561 Whether to include a mean or not.
562 - results : str
563 The full results from the X12/X13 analysis.
564 - stdout : str
565 The captured stdout from the X12/X13 analysis.
567 Notes
568 -----
569 This works by creating a specification file, writing it to a temporary
570 directory, invoking X12/X13 in a subprocess, and reading the output back
571 in.
572 """
573 results = x13_arima_analysis(endog, x12path=x12path, exog=exog, log=log,
574 outlier=outlier, trading=trading,
575 forecast_periods=forecast_periods,
576 maxorder=maxorder, maxdiff=maxdiff, diff=diff,
577 start=start, freq=freq, prefer_x13=prefer_x13)
578 model = re.search("(?<=Final automatic model choice : ).*",
579 results.results)
580 order = model.group()
581 if re.search("Mean is not significant", results.results):
582 include_mean = False
583 elif re.search("Constant", results.results):
584 include_mean = True
585 else:
586 include_mean = False
587 order, sorder = _clean_order(order)
588 res = Bunch(order=order, sorder=sorder, include_mean=include_mean,
589 results=results.results, stdout=results.stdout)
590 return res
593class X13ArimaAnalysisResult(object):
594 def __init__(self, **kwargs):
595 for key, value in iteritems(kwargs):
596 setattr(self, key, value)
598 def plot(self):
599 from statsmodels.graphics.utils import _import_mpl
600 plt = _import_mpl()
601 fig, axes = plt.subplots(4, 1, sharex=True)
602 self.observed.plot(ax=axes[0], legend=False)
603 axes[0].set_ylabel('Observed')
604 self.seasadj.plot(ax=axes[1], legend=False)
605 axes[1].set_ylabel('Seas. Adjusted')
606 self.trend.plot(ax=axes[2], legend=False)
607 axes[2].set_ylabel('Trend')
608 self.irregular.plot(ax=axes[3], legend=False)
609 axes[3].set_ylabel('Irregular')
611 fig.tight_layout()
612 return fig