Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/vector_ar/svar_model.py : 13%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3Vector Autoregression (VAR) processes
5References
6----------
7Lütkepohl (2005) New Introduction to Multiple Time Series Analysis
8"""
10import numpy as np
11import numpy.linalg as npl
12from numpy.linalg import slogdet
14from statsmodels.compat.pandas import deprecate_kwarg
16from statsmodels.tools.decorators import deprecated_alias
17from statsmodels.tools.numdiff import approx_hess, approx_fprime
18from statsmodels.tsa.vector_ar.irf import IRAnalysis
19from statsmodels.tsa.vector_ar.var_model import VARProcess, VARResults
21import statsmodels.tsa.vector_ar.util as util
22import statsmodels.tsa.base.tsa_model as tsbase
25def svar_ckerr(svar_type, A, B):
26 if A is None and (svar_type == 'A' or svar_type == 'AB'):
27 raise ValueError('SVAR of type A or AB but A array not given.')
28 if B is None and (svar_type == 'B' or svar_type == 'AB'):
30 raise ValueError('SVAR of type B or AB but B array not given.')
33class SVAR(tsbase.TimeSeriesModel):
34 r"""
35 Fit VAR and then estimate structural components of A and B, defined:
37 .. math:: Ay_t = A_1 y_{t-1} + \ldots + A_p y_{t-p} + B\var(\epsilon_t)
39 Parameters
40 ----------
41 endog : array_like
42 1-d endogenous response variable. The independent variable.
43 dates : array_like
44 must match number of rows of endog
45 svar_type : str
46 "A" - estimate structural parameters of A matrix, B assumed = I
47 "B" - estimate structural parameters of B matrix, A assumed = I
48 "AB" - estimate structural parameters indicated in both A and B matrix
49 A : array_like
50 neqs x neqs with unknown parameters marked with 'E' for estimate
51 B : array_like
52 neqs x neqs with unknown parameters marked with 'E' for estimate
54 References
55 ----------
56 Hamilton (1994) Time Series Analysis
57 """
59 y = deprecated_alias("y", "endog", remove_version="0.11.0")
61 def __init__(self, endog, svar_type, dates=None,
62 freq=None, A=None, B=None, missing='none'):
63 super(SVAR, self).__init__(endog, None, dates, freq, missing=missing)
64 #(self.endog, self.names,
65 # self.dates) = data_util.interpret_data(endog, names, dates)
67 self.neqs = self.endog.shape[1]
69 types = ['A', 'B', 'AB']
70 if svar_type not in types:
71 raise ValueError('SVAR type not recognized, must be in '
72 + str(types))
73 self.svar_type = svar_type
75 svar_ckerr(svar_type, A, B)
77 self.A_original = A
78 self.B_original = B
80 # initialize A, B as I if not given
81 # Initialize SVAR masks
82 if A is None:
83 A = np.identity(self.neqs)
84 self.A_mask = A_mask = np.zeros(A.shape, dtype=bool)
85 else:
86 A_mask = np.logical_or(A == 'E', A == 'e')
87 self.A_mask = A_mask
88 if B is None:
89 B = np.identity(self.neqs)
90 self.B_mask = B_mask = np.zeros(B.shape, dtype=bool)
91 else:
92 B_mask = np.logical_or(B == 'E', B == 'e')
93 self.B_mask = B_mask
95 # convert A and B to numeric
96 #TODO: change this when masked support is better or with formula
97 #integration
98 Anum = np.zeros(A.shape, dtype=float)
99 Anum[~A_mask] = A[~A_mask]
100 Anum[A_mask] = np.nan
101 self.A = Anum
103 Bnum = np.zeros(B.shape, dtype=float)
104 Bnum[~B_mask] = B[~B_mask]
105 Bnum[B_mask] = np.nan
106 self.B = Bnum
108 #LikelihoodModel.__init__(self, endog)
110 #super(SVAR, self).__init__(endog)
112 def fit(self, A_guess=None, B_guess=None, maxlags=None, method='ols',
113 ic=None, trend='c', verbose=False, s_method='mle',
114 solver="bfgs", override=False, maxiter=500, maxfun=500):
115 """
116 Fit the SVAR model and solve for structural parameters
118 Parameters
119 ----------
120 A_guess : array_like, optional
121 A vector of starting values for all parameters to be estimated
122 in A.
123 B_guess : array_like, optional
124 A vector of starting values for all parameters to be estimated
125 in B.
126 maxlags : int
127 Maximum number of lags to check for order selection, defaults to
128 12 * (nobs/100.)**(1./4), see select_order function
129 method : {'ols'}
130 Estimation method to use
131 ic : {'aic', 'fpe', 'hqic', 'bic', None}
132 Information criterion to use for VAR order selection.
133 aic : Akaike
134 fpe : Final prediction error
135 hqic : Hannan-Quinn
136 bic : Bayesian a.k.a. Schwarz
137 verbose : bool, default False
138 Print order selection output to the screen
139 trend, str {"c", "ct", "ctt", "nc"}
140 "c" - add constant
141 "ct" - constant and trend
142 "ctt" - constant, linear and quadratic trend
143 "nc" - co constant, no trend
144 Note that these are prepended to the columns of the dataset.
145 s_method : {'mle'}
146 Estimation method for structural parameters
147 solver : {'nm', 'newton', 'bfgs', 'cg', 'ncg', 'powell'}
148 Solution method
149 See statsmodels.base for details
150 override : bool, default False
151 If True, returns estimates of A and B without checking
152 order or rank condition
153 maxiter : int, default 500
154 Number of iterations to perform in solution method
155 maxfun : int
156 Number of function evaluations to perform
158 Notes
159 -----
160 Lütkepohl pp. 146-153
161 Hamilton pp. 324-336
163 Returns
164 -------
165 est : SVARResults
166 """
167 lags = maxlags
169 if ic is not None:
170 selections = self.select_order(maxlags=maxlags, verbose=verbose)
171 if ic not in selections:
172 raise ValueError("%s not recognized, must be among %s"
173 % (ic, sorted(selections)))
174 lags = selections[ic]
175 if verbose:
176 print('Using %d based on %s criterion' % (lags, ic))
177 else:
178 if lags is None:
179 lags = 1
181 self.nobs = len(self.endog) - lags
183 # initialize starting parameters
184 start_params = self._get_init_params(A_guess, B_guess)
186 return self._estimate_svar(start_params, lags, trend=trend,
187 solver=solver, override=override,
188 maxiter=maxiter, maxfun=maxfun)
190 def _get_init_params(self, A_guess, B_guess):
191 """
192 Returns either the given starting or .1 if none are given.
193 """
195 var_type = self.svar_type.lower()
197 n_masked_a = self.A_mask.sum()
198 if var_type in ['ab', 'a']:
199 if A_guess is None:
200 A_guess = np.array([.1]*n_masked_a)
201 else:
202 if len(A_guess) != n_masked_a:
203 msg = 'len(A_guess) = %s, there are %s parameters in A'
204 raise ValueError(msg % (len(A_guess), n_masked_a))
205 else:
206 A_guess = []
208 n_masked_b = self.B_mask.sum()
209 if var_type in ['ab', 'b']:
210 if B_guess is None:
211 B_guess = np.array([.1]*n_masked_b)
212 else:
213 if len(B_guess) != n_masked_b:
214 msg = 'len(B_guess) = %s, there are %s parameters in B'
215 raise ValueError(msg % (len(B_guess), n_masked_b))
216 else:
217 B_guess = []
219 return np.r_[A_guess, B_guess]
221 def _estimate_svar(self, start_params, lags, maxiter, maxfun,
222 trend='c', solver="nm", override=False):
223 """
224 lags : int
225 trend : {str, None}
226 As per above
227 """
228 k_trend = util.get_trendorder(trend)
229 y = self.endog
230 z = util.get_var_endog(y, lags, trend=trend, has_constant='raise')
231 y_sample = y[lags:]
233 # Lutkepohl p75, about 5x faster than stated formula
234 var_params = np.linalg.lstsq(z, y_sample, rcond=-1)[0]
235 resid = y_sample - np.dot(z, var_params)
237 # Unbiased estimate of covariance matrix $\Sigma_u$ of the white noise
238 # process $u$
239 # equivalent definition
240 # .. math:: \frac{1}{T - Kp - 1} Y^\prime (I_T - Z (Z^\prime Z)^{-1}
241 # Z^\prime) Y
242 # Ref: Lutkepohl p.75
243 # df_resid right now is T - Kp - 1, which is a suggested correction
245 avobs = len(y_sample)
247 df_resid = avobs - (self.neqs * lags + k_trend)
249 sse = np.dot(resid.T, resid)
250 #TODO: should give users the option to use a dof correction or not
251 omega = sse / df_resid
252 self.sigma_u = omega
254 A, B = self._solve_AB(start_params, override=override,
255 solver=solver,
256 maxiter=maxiter,
257 maxfun=maxfun)
258 A_mask = self.A_mask
259 B_mask = self.B_mask
261 return SVARResults(y, z, var_params, omega, lags,
262 names=self.endog_names, trend=trend,
263 dates=self.data.dates, model=self,
264 A=A, B=B, A_mask=A_mask, B_mask=B_mask)
266 def loglike(self, params):
267 """
268 Loglikelihood for SVAR model
270 Notes
271 -----
272 This method assumes that the autoregressive parameters are
273 first estimated, then likelihood with structural parameters
274 is estimated
275 """
277 #TODO: this does not look robust if A or B is None
278 A = self.A
279 B = self.B
280 A_mask = self.A_mask
281 B_mask = self.B_mask
282 A_len = len(A[A_mask])
283 B_len = len(B[B_mask])
285 if A is not None:
286 A[A_mask] = params[:A_len]
287 if B is not None:
288 B[B_mask] = params[A_len:A_len+B_len]
290 nobs = self.nobs
291 neqs = self.neqs
292 sigma_u = self.sigma_u
294 W = np.dot(npl.inv(B),A)
295 trc_in = np.dot(np.dot(W.T,W),sigma_u)
296 sign, b_logdet = slogdet(B**2) #numpy 1.4 compat
297 b_slogdet = sign * b_logdet
299 likl = -nobs/2. * (neqs * np.log(2 * np.pi) -
300 np.log(npl.det(A)**2) + b_slogdet +
301 np.trace(trc_in))
303 return likl
305 def score(self, AB_mask):
306 """
307 Return the gradient of the loglike at AB_mask.
309 Parameters
310 ----------
311 AB_mask : unknown values of A and B matrix concatenated
313 Notes
314 -----
315 Return numerical gradient
316 """
317 loglike = self.loglike
318 return approx_fprime(AB_mask, loglike, epsilon=1e-8)
320 def hessian(self, AB_mask):
321 """
322 Returns numerical hessian.
323 """
324 loglike = self.loglike
325 return approx_hess(AB_mask, loglike)
327 def _solve_AB(self, start_params, maxiter, maxfun, override=False,
328 solver='bfgs'):
329 """
330 Solves for MLE estimate of structural parameters
332 Parameters
333 ----------
335 override : bool, default False
336 If True, returns estimates of A and B without checking
337 order or rank condition
338 solver : str or None, optional
339 Solver to be used. The default is 'nm' (Nelder-Mead). Other
340 choices are 'bfgs', 'newton' (Newton-Raphson), 'cg'
341 conjugate, 'ncg' (non-conjugate gradient), and 'powell'.
342 maxiter : int, optional
343 The maximum number of iterations. Default is 500.
344 maxfun : int, optional
345 The maximum number of function evalutions.
347 Returns
348 -------
349 A_solve, B_solve: ML solutions for A, B matrices
350 """
351 #TODO: this could stand a refactor
352 A_mask = self.A_mask
353 B_mask = self.B_mask
354 A = self.A
355 B = self.B
356 A_len = len(A[A_mask])
358 A[A_mask] = start_params[:A_len]
359 B[B_mask] = start_params[A_len:]
361 if not override:
362 J = self._compute_J(A, B)
363 self.check_order(J)
364 self.check_rank(J)
365 else: #TODO: change to a warning?
366 print("Order/rank conditions have not been checked")
368 retvals = super(SVAR, self).fit(start_params=start_params,
369 method=solver, maxiter=maxiter,
370 maxfun=maxfun, ftol=1e-20,
371 disp=0).params
373 A[A_mask] = retvals[:A_len]
374 B[B_mask] = retvals[A_len:]
376 return A, B
378 def _compute_J(self, A_solve, B_solve):
380 #first compute appropriate duplication matrix
381 # taken from Magnus and Neudecker (1980),
382 #"The Elimination Matrix: Some Lemmas and Applications
383 # the creation of the D_n matrix follows MN (1980) directly,
384 #while the rest follows Hamilton (1994)
386 neqs = self.neqs
387 sigma_u = self.sigma_u
388 A_mask = self.A_mask
389 B_mask = self.B_mask
391 #first generate duplication matrix, see MN (1980) for notation
393 D_nT = np.zeros([int((1.0 / 2) * (neqs) * (neqs + 1)), neqs**2])
395 for j in range(neqs):
396 i=j
397 while j <= i < neqs:
398 u=np.zeros([int((1.0/2)*neqs*(neqs+1)), 1])
399 u[int(j * neqs + (i + 1) - (1.0 / 2) * (j + 1) * j - 1)] = 1
400 Tij=np.zeros([neqs,neqs])
401 Tij[i,j]=1
402 Tij[j,i]=1
403 D_nT=D_nT+np.dot(u,(Tij.ravel('F')[:,None]).T)
404 i=i+1
406 D_n=D_nT.T
407 D_pl=npl.pinv(D_n)
409 #generate S_B
410 S_B = np.zeros((neqs**2, len(A_solve[A_mask])))
411 S_D = np.zeros((neqs**2, len(B_solve[B_mask])))
413 j = 0
414 j_d = 0
415 if len(A_solve[A_mask]) != 0:
416 A_vec = np.ravel(A_mask, order='F')
417 for k in range(neqs**2):
418 if A_vec[k]:
419 S_B[k,j] = -1
420 j += 1
421 if len(B_solve[B_mask]) != 0:
422 B_vec = np.ravel(B_mask, order='F')
423 for k in range(neqs**2):
424 if B_vec[k]:
425 S_D[k,j_d] = 1
426 j_d +=1
428 #now compute J
429 invA = npl.inv(A_solve)
430 J_p1i = np.dot(np.dot(D_pl, np.kron(sigma_u, invA)), S_B)
431 J_p1 = -2.0 * J_p1i
432 J_p2 = np.dot(np.dot(D_pl, np.kron(invA, invA)), S_D)
434 J = np.append(J_p1, J_p2, axis=1)
436 return J
438 def check_order(self, J):
439 if np.size(J, axis=0) < np.size(J, axis=1):
440 raise ValueError("Order condition not met: "
441 "solution may not be unique")
443 def check_rank(self, J):
444 rank = np.linalg.matrix_rank(J)
445 if rank < np.size(J, axis=1):
446 raise ValueError("Rank condition not met: "
447 "solution may not be unique.")
450class SVARProcess(VARProcess):
451 """
452 Class represents a known SVAR(p) process
454 Parameters
455 ----------
456 coefs : ndarray (p x k x k)
457 intercept : ndarray (length k)
458 sigma_u : ndarray (k x k)
459 names : sequence (length k)
460 A : neqs x neqs np.ndarray with unknown parameters marked with 'E'
461 A_mask : neqs x neqs mask array with known parameters masked
462 B : neqs x neqs np.ndarry with unknown parameters marked with 'E'
463 B_mask : neqs x neqs mask array with known parameters masked
464 """
465 def __init__(self, coefs, intercept, sigma_u, A_solve, B_solve,
466 names=None):
467 self.k_ar = len(coefs)
468 self.neqs = coefs.shape[1]
469 self.coefs = coefs
470 self.intercept = intercept
471 self.sigma_u = sigma_u
472 self.A_solve = A_solve
473 self.B_solve = B_solve
474 self.names = names
476 def orth_ma_rep(self, maxn=10, P=None):
477 """
479 Unavailable for SVAR
480 """
481 raise NotImplementedError
483 def svar_ma_rep(self, maxn=10, P=None):
484 """
486 Compute Structural MA coefficient matrices using MLE
487 of A, B
488 """
489 if P is None:
490 A_solve = self.A_solve
491 B_solve = self.B_solve
492 P = np.dot(npl.inv(A_solve), B_solve)
494 ma_mats = self.ma_rep(maxn=maxn)
495 return np.array([np.dot(coefs, P) for coefs in ma_mats])
498class SVARResults(SVARProcess, VARResults):
499 """
500 Estimate VAR(p) process with fixed number of lags
502 Parameters
503 ----------
504 endog : ndarray
505 endog_lagged : ndarray
506 params : ndarray
507 sigma_u : ndarray
508 lag_order : int
509 model : VAR model instance
510 trend : str {'nc', 'c', 'ct'}
511 names : array_like
512 List of names of the endogenous variables in order of appearance in `endog`.
513 dates
515 Attributes
516 ----------
517 aic
518 bic
519 bse
520 coefs : ndarray (p x K x K)
521 Estimated A_i matrices, A_i = coefs[i-1]
522 cov_params
523 dates
524 detomega
525 df_model : int
526 df_resid : int
527 endog
528 endog_lagged
529 fittedvalues
530 fpe
531 intercept
532 info_criteria
533 k_ar : int
534 k_trend : int
535 llf
536 model
537 names
538 neqs : int
539 Number of variables (equations)
540 nobs : int
541 n_totobs : int
542 params
543 k_ar : int
544 Order of VAR process
545 params : ndarray (Kp + 1) x K
546 A_i matrices and intercept in stacked form [int A_1 ... A_p]
547 pvalue
548 names : list
549 variables names
550 resid
551 sigma_u : ndarray (K x K)
552 Estimate of white noise process variance Var[u_t]
553 sigma_u_mle
554 stderr
555 trenorder
556 tvalues
557 y :
558 ys_lagged
559 """
561 _model_type = 'SVAR'
563 y = deprecated_alias("y", "endog", remove_version="0.11.0")
564 ys_lagged = deprecated_alias("ys_lagged", "endog_lagged",
565 remove_version="0.11.0")
567 def __init__(self, endog, endog_lagged, params, sigma_u, lag_order,
568 A=None, B=None, A_mask=None, B_mask=None, model=None,
569 trend='c', names=None, dates=None):
571 self.model = model
572 self.endog = endog
573 self.endog_lagged = endog_lagged
574 self.dates = dates
576 self.n_totobs, self.neqs = self.endog.shape
577 self.nobs = self.n_totobs - lag_order
578 k_trend = util.get_trendorder(trend)
579 if k_trend > 0: # make this the polynomial trend order
580 trendorder = k_trend - 1
581 else:
582 trendorder = None
583 self.k_trend = k_trend
584 self.k_exog = k_trend # now (0.9) required by VARProcess
585 self.trendorder = trendorder
587 self.exog_names = util.make_lag_names(names, lag_order, k_trend)
588 self.params = params
589 self.sigma_u = sigma_u
591 # Each matrix needs to be transposed
592 reshaped = self.params[self.k_trend:]
593 reshaped = reshaped.reshape((lag_order, self.neqs, self.neqs))
595 # Need to transpose each coefficient matrix
596 intercept = self.params[0]
597 coefs = reshaped.swapaxes(1, 2).copy()
599 #SVAR components
600 #TODO: if you define these here, you do not also have to define
601 #them in SVAR process, but I left them for now -ss
602 self.A = A
603 self.B = B
604 self.A_mask = A_mask
605 self.B_mask = B_mask
607 super(SVARResults, self).__init__(coefs, intercept, sigma_u, A, B,
608 names=names)
610 def irf(self, periods=10, var_order=None):
611 """
612 Analyze structural impulse responses to shocks in system
614 Parameters
615 ----------
616 periods : int
618 Returns
619 -------
620 irf : IRAnalysis
621 """
622 A = self.A
623 B= self.B
624 P = np.dot(npl.inv(A), B)
626 return IRAnalysis(self, P=P, periods=periods, svar=True)
628 @deprecate_kwarg('T', 'steps')
629 def sirf_errband_mc(self, orth=False, repl=1000, steps=10,
630 signif=0.05, seed=None, burn=100, cum=False):
631 """
632 Compute Monte Carlo integrated error bands assuming normally
633 distributed for impulse response functions
635 Parameters
636 ----------
637 orth: bool, default False
638 Compute orthogonalized impulse response error bands
639 repl: int
640 number of Monte Carlo replications to perform
641 steps: int, default 10
642 number of impulse response periods
643 signif: float (0 < signif <1)
644 Significance level for error bars, defaults to 95% CI
645 seed: int
646 np.random.seed for replications
647 burn: int
648 number of initial observations to discard for simulation
649 cum: bool, default False
650 produce cumulative irf error bands
652 Notes
653 -----
654 Lütkepohl (2005) Appendix D
656 Returns
657 -------
658 Tuple of lower and upper arrays of ma_rep monte carlo standard errors
659 """
660 neqs = self.neqs
661 mean = self.mean()
662 k_ar = self.k_ar
663 coefs = self.coefs
664 sigma_u = self.sigma_u
665 intercept = self.intercept
666 df_model = self.df_model
667 nobs = self.nobs
669 ma_coll = np.zeros((repl, steps + 1, neqs, neqs))
670 A = self.A
671 B = self.B
672 A_mask = self.A_mask
673 B_mask = self.B_mask
674 A_pass = self.model.A_original
675 B_pass = self.model.B_original
676 s_type = self.model.svar_type
678 g_list = []
680 def agg(impulses):
681 if cum:
682 return impulses.cumsum(axis=0)
683 return impulses
685 opt_A = A[A_mask]
686 opt_B = B[B_mask]
687 for i in range(repl):
688 # discard first hundred to correct for starting bias
689 sim = util.varsim(coefs, intercept, sigma_u, seed=seed,
690 steps=nobs + burn)
691 sim = sim[burn:]
693 smod = SVAR(sim, svar_type=s_type, A=A_pass, B=B_pass)
694 if i == 10:
695 # Use first 10 to update starting val for remainder of fits
696 mean_AB = np.mean(g_list, axis=0)
697 split = len(A[A_mask])
698 opt_A = mean_AB[:split]
699 opt_B = mean_AB[split:]
701 sres = smod.fit(maxlags=k_ar, A_guess=opt_A, B_guess=opt_B)
703 if i < 10:
704 # save estimates for starting val if in first 10
705 g_list.append(np.append(sres.A[A_mask].tolist(),
706 sres.B[B_mask].tolist()))
707 ma_coll[i] = agg(sres.svar_ma_rep(maxn=steps))
709 ma_sort = np.sort(ma_coll, axis=0) # sort to get quantiles
710 index = (int(round(signif / 2 * repl) - 1),
711 int(round((1 - signif / 2) * repl) - 1))
712 lower = ma_sort[index[0], :, :, :]
713 upper = ma_sort[index[1], :, :, :]
714 return lower, upper