Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/genmod/bayes_mixed_glm.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1r"""
2Bayesian inference for generalized linear mixed models.
4Currently only families without additional scale or shape parameters
5are supported (binomial and Poisson).
7Two estimation approaches are supported: Laplace approximation
8('maximum a posteriori'), and variational Bayes (mean field
9approximation to the posterior distribution).
11All realizations of random effects are modeled to be mutually
12independent in this implementation.
14The `exog_vc` matrix is the design matrix for the random effects.
15Every column of `exog_vc` corresponds to an independent realization of
16a random effect. These random effects have mean zero and an unknown
17standard deviation. The standard deviation parameters are constrained
18to be equal within subsets of the columns. When not using formulas,
19these subsets are specified through the parameter `ident`. `ident`
20must have the same length as the number of columns of `exog_vc`, and
21two columns whose `ident` values are equal have the same standard
22deviation. When formulas are used, the columns of `exog_vc` derived
23from a common formula are constrained to have the same standard
24deviation.
26In many applications, `exog_vc` will be sparse. A sparse matrix may
27be passed when constructing a model class. If a dense matrix is
28passed, it will be converted internally to a sparse matrix. There
29currently is no way to avoid creating a temporary dense version of
30`exog_vc` when using formulas.
32Model and parameterization
33--------------------------
34The joint density of data and parameters factors as:
36.. math::
38 p(y | vc, fep) p(vc | vcp) p(vcp) p(fe)
40The terms :math:`p(vcp)` and :math:`p(fe)` are prior distributions
41that are taken to be Gaussian (the :math:`vcp` parameters are log
42standard deviations so the standard deviations have log-normal
43distributions). The random effects distribution :math:`p(vc | vcp)`
44is independent Gaussian (random effect realizations are independent
45within and between values of the `ident` array). The model
46:math:`p(y | vc, fep)` depends on the specific GLM being fit.
47"""
49import numpy as np
50from scipy.optimize import minimize
51from scipy import sparse
52import statsmodels.base.model as base
53from statsmodels.iolib import summary2
54from statsmodels.genmod import families
55import pandas as pd
56import warnings
57import patsy
59# Gauss-Legendre weights
60glw = [
61 [0.2955242247147529, -0.1488743389816312],
62 [0.2955242247147529, 0.1488743389816312],
63 [0.2692667193099963, -0.4333953941292472],
64 [0.2692667193099963, 0.4333953941292472],
65 [0.2190863625159820, -0.6794095682990244],
66 [0.2190863625159820, 0.6794095682990244],
67 [0.1494513491505806, -0.8650633666889845],
68 [0.1494513491505806, 0.8650633666889845],
69 [0.0666713443086881, -0.9739065285171717],
70 [0.0666713443086881, 0.9739065285171717],
71]
73_init_doc = r"""
74 Generalized Linear Mixed Model with Bayesian estimation
76 The class implements the Laplace approximation to the posterior
77 distribution (`fit_map`) and a variational Bayes approximation to
78 the posterior (`fit_vb`). See the two fit method docstrings for
79 more information about the fitting approaches.
81 Parameters
82 ----------
83 endog : array_like
84 Vector of response values.
85 exog : array_like
86 Array of covariates for the fixed effects part of the mean
87 structure.
88 exog_vc : array_like
89 Array of covariates for the random part of the model. A
90 scipy.sparse array may be provided, or else the passed
91 array will be converted to sparse internally.
92 ident : array_like
93 Array of integer labels showing which random terms (columns
94 of `exog_vc`) have a common variance.
95 vcp_p : float
96 Prior standard deviation for variance component parameters
97 (the prior standard deviation of log(s) is vcp_p, where s is
98 the standard deviation of a random effect).
99 fe_p : float
100 Prior standard deviation for fixed effects parameters.
101 family : statsmodels.genmod.families instance
102 The GLM family.
103 fep_names : list[str]
104 The names of the fixed effects parameters (corresponding to
105 columns of exog). If None, default names are constructed.
106 vcp_names : list[str]
107 The names of the variance component parameters (corresponding
108 to distinct labels in ident). If None, default names are
109 constructed.
110 vc_names : list[str]
111 The names of the random effect realizations.
113 Returns
114 -------
115 MixedGLMResults object
117 Notes
118 -----
119 There are three types of values in the posterior distribution:
120 fixed effects parameters (fep), corresponding to the columns of
121 `exog`, random effects realizations (vc), corresponding to the
122 columns of `exog_vc`, and the standard deviations of the random
123 effects realizations (vcp), corresponding to the unique integer
124 labels in `ident`.
126 All random effects are modeled as being independent Gaussian
127 values (given the variance structure parameters). Every column of
128 `exog_vc` has a distinct realized random effect that is used to
129 form the linear predictors. The elements of `ident` determine the
130 distinct variance structure parameters. Two random effect
131 realizations that have the same value in `ident` have the same
132 variance. When fitting with a formula, `ident` is constructed
133 internally (each element of `vc_formulas` yields a distinct label
134 in `ident`).
136 The random effect standard deviation parameters (`vcp`) have
137 log-normal prior distributions with mean 0 and standard deviation
138 `vcp_p`.
140 Note that for some families, e.g. Binomial, the posterior mode may
141 be difficult to find numerically if `vcp_p` is set to too large of
142 a value. Setting `vcp_p` to 0.5 seems to work well.
144 The prior for the fixed effects parameters is Gaussian with mean 0
145 and standard deviation `fe_p`.
147 Examples
148 --------{example}
151 References
152 ----------
153 Introduction to generalized linear mixed models:
154 https://stats.idre.ucla.edu/other/mult-pkg/introduction-to-generalized-linear-mixed-models
156 SAS documentation:
157 https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_intromix_a0000000215.htm
159 An assessment of estimation methods for generalized linear mixed
160 models with binary outcomes
161 https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3866838/
162 """
164# The code in the example should be identical to what appears in
165# the test_doc_examples unit test
166_logit_example = """
167 A binomial (logistic) random effects model with random intercepts
168 for villages and random slopes for each year within each village:
170 >>> random = {"a": '0 + C(Village)', "b": '0 + C(Village)*year_cen'}
171 >>> model = BinomialBayesMixedGLM.from_formula(
172 'y ~ year_cen', random, data)
173 >>> result = model.fit_vb()
174"""
176# The code in the example should be identical to what appears in
177# the test_doc_examples unit test
178_poisson_example = """
179 A Poisson random effects model with random intercepts for villages
180 and random slopes for each year within each village:
182 >>> random = {"a": '0 + C(Village)', "b": '0 + C(Village)*year_cen'}
183 >>> model = PoissonBayesMixedGLM.from_formula(
184 'y ~ year_cen', random, data)
185 >>> result = model.fit_vb()
186"""
189class _BayesMixedGLM(base.Model):
190 def __init__(self,
191 endog,
192 exog,
193 exog_vc=None,
194 ident=None,
195 family=None,
196 vcp_p=1,
197 fe_p=2,
198 fep_names=None,
199 vcp_names=None,
200 vc_names=None,
201 **kwargs):
203 if exog.ndim == 1:
204 if isinstance(exog, np.ndarray):
205 exog = exog[:, None]
206 else:
207 exog = pd.DataFrame(exog)
209 if exog.ndim != 2:
210 msg = "'exog' must have one or two columns"
211 raise ValueError(msg)
213 if exog_vc.ndim == 1:
214 if isinstance(exog_vc, np.ndarray):
215 exog_vc = exog_vc[:, None]
216 else:
217 exog_vc = pd.DataFrame(exog_vc)
219 if exog_vc.ndim != 2:
220 msg = "'exog_vc' must have one or two columns"
221 raise ValueError(msg)
223 ident = np.asarray(ident)
224 if ident.ndim != 1:
225 msg = "ident must be a one-dimensional array"
226 raise ValueError(msg)
228 if len(ident) != exog_vc.shape[1]:
229 msg = "len(ident) should match the number of columns of exog_vc"
230 raise ValueError(msg)
232 if not np.issubdtype(ident.dtype, np.integer):
233 msg = "ident must have an integer dtype"
234 raise ValueError(msg)
236 # Get the fixed effects parameter names
237 if fep_names is None:
238 if hasattr(exog, "columns"):
239 fep_names = exog.columns.tolist()
240 else:
241 fep_names = ["FE_%d" % (k + 1) for k in range(exog.shape[1])]
243 # Get the variance parameter names
244 if vcp_names is None:
245 vcp_names = ["VC_%d" % (k + 1) for k in range(int(max(ident)) + 1)]
246 else:
247 if len(vcp_names) != len(set(ident)):
248 msg = "The lengths of vcp_names and ident should be the same"
249 raise ValueError(msg)
251 if not sparse.issparse(exog_vc):
252 exog_vc = sparse.csr_matrix(exog_vc)
254 ident = ident.astype(np.int)
255 vcp_p = float(vcp_p)
256 fe_p = float(fe_p)
258 # Number of fixed effects parameters
259 if exog is None:
260 k_fep = 0
261 else:
262 k_fep = exog.shape[1]
264 # Number of variance component structure parameters and
265 # variance component realizations.
266 if exog_vc is None:
267 k_vc = 0
268 k_vcp = 0
269 else:
270 k_vc = exog_vc.shape[1]
271 k_vcp = max(ident) + 1
273 # power might be better but not available in older scipy
274 exog_vc2 = exog_vc.multiply(exog_vc)
276 super(_BayesMixedGLM, self).__init__(endog, exog, **kwargs)
278 self.exog_vc = exog_vc
279 self.exog_vc2 = exog_vc2
280 self.ident = ident
281 self.family = family
282 self.k_fep = k_fep
283 self.k_vc = k_vc
284 self.k_vcp = k_vcp
285 self.fep_names = fep_names
286 self.vcp_names = vcp_names
287 self.vc_names = vc_names
288 self.fe_p = fe_p
289 self.vcp_p = vcp_p
290 self.names = fep_names + vcp_names
291 if vc_names is not None:
292 self.names += vc_names
294 def _unpack(self, vec):
296 ii = 0
298 # Fixed effects parameters
299 fep = vec[:ii + self.k_fep]
300 ii += self.k_fep
302 # Variance component structure parameters (standard
303 # deviations). These are on the log scale. The standard
304 # deviation for random effect j is exp(vcp[ident[j]]).
305 vcp = vec[ii:ii + self.k_vcp]
306 ii += self.k_vcp
308 # Random effect realizations
309 vc = vec[ii:]
311 return fep, vcp, vc
313 def logposterior(self, params):
314 """
315 The overall log-density: log p(y, fe, vc, vcp).
317 This differs by an additive constant from the log posterior
318 log p(fe, vc, vcp | y).
319 """
321 fep, vcp, vc = self._unpack(params)
323 # Contributions from p(y | x, vc)
324 lp = 0
325 if self.k_fep > 0:
326 lp += np.dot(self.exog, fep)
327 if self.k_vc > 0:
328 lp += self.exog_vc.dot(vc)
330 mu = self.family.link.inverse(lp)
331 ll = self.family.loglike(self.endog, mu)
333 if self.k_vc > 0:
335 # Contributions from p(vc | vcp)
336 vcp0 = vcp[self.ident]
337 s = np.exp(vcp0)
338 ll -= 0.5 * np.sum(vc**2 / s**2) + np.sum(vcp0)
340 # Contributions from p(vc)
341 ll -= 0.5 * np.sum(vcp**2 / self.vcp_p**2)
343 # Contributions from p(fep)
344 if self.k_fep > 0:
345 ll -= 0.5 * np.sum(fep**2 / self.fe_p**2)
347 return ll
349 def logposterior_grad(self, params):
350 """
351 The gradient of the log posterior.
352 """
354 fep, vcp, vc = self._unpack(params)
356 lp = 0
357 if self.k_fep > 0:
358 lp += np.dot(self.exog, fep)
359 if self.k_vc > 0:
360 lp += self.exog_vc.dot(vc)
362 mu = self.family.link.inverse(lp)
364 score_factor = (self.endog - mu) / self.family.link.deriv(mu)
365 score_factor /= self.family.variance(mu)
367 te = [None, None, None]
369 # Contributions from p(y | x, z, vc)
370 if self.k_fep > 0:
371 te[0] = np.dot(score_factor, self.exog)
372 if self.k_vc > 0:
373 te[2] = self.exog_vc.transpose().dot(score_factor)
375 if self.k_vc > 0:
376 # Contributions from p(vc | vcp)
377 # vcp0 = vcp[self.ident]
378 # s = np.exp(vcp0)
379 # ll -= 0.5 * np.sum(vc**2 / s**2) + np.sum(vcp0)
380 vcp0 = vcp[self.ident]
381 s = np.exp(vcp0)
382 u = vc**2 / s**2 - 1
383 te[1] = np.bincount(self.ident, weights=u)
384 te[2] -= vc / s**2
386 # Contributions from p(vcp)
387 # ll -= 0.5 * np.sum(vcp**2 / self.vcp_p**2)
388 te[1] -= vcp / self.vcp_p**2
390 # Contributions from p(fep)
391 if self.k_fep > 0:
392 te[0] -= fep / self.fe_p**2
394 te = [x for x in te if x is not None]
396 return np.concatenate(te)
398 def _get_start(self):
399 start_fep = np.zeros(self.k_fep)
400 start_vcp = np.ones(self.k_vcp)
401 start_vc = np.random.normal(size=self.k_vc)
402 start = np.concatenate((start_fep, start_vcp, start_vc))
403 return start
405 @classmethod
406 def from_formula(cls,
407 formula,
408 vc_formulas,
409 data,
410 family=None,
411 vcp_p=1,
412 fe_p=2):
413 """
414 Fit a BayesMixedGLM using a formula.
416 Parameters
417 ----------
418 formula : str
419 Formula for the endog and fixed effects terms (use ~ to
420 separate dependent and independent expressions).
421 vc_formulas : dictionary
422 vc_formulas[name] is a one-sided formula that creates one
423 collection of random effects with a common variance
424 parameter. If using categorical (factor) variables to
425 produce variance components, note that generally `0 + ...`
426 should be used so that an intercept is not included.
427 data : data frame
428 The data to which the formulas are applied.
429 family : genmod.families instance
430 A GLM family.
431 vcp_p : float
432 The prior standard deviation for the logarithms of the standard
433 deviations of the random effects.
434 fe_p : float
435 The prior standard deviation for the fixed effects parameters.
436 """
438 ident = []
439 exog_vc = []
440 vcp_names = []
441 j = 0
442 for na, fml in vc_formulas.items():
443 mat = patsy.dmatrix(fml, data, return_type='dataframe')
444 exog_vc.append(mat)
445 vcp_names.append(na)
446 ident.append(j * np.ones(mat.shape[1], dtype=np.integer))
447 j += 1
448 exog_vc = pd.concat(exog_vc, axis=1)
449 vc_names = exog_vc.columns.tolist()
451 ident = np.concatenate(ident)
453 model = super(_BayesMixedGLM, cls).from_formula(
454 formula,
455 data=data,
456 family=family,
457 subset=None,
458 exog_vc=exog_vc,
459 ident=ident,
460 vc_names=vc_names,
461 vcp_names=vcp_names,
462 fe_p=fe_p,
463 vcp_p=vcp_p)
465 return model
467 def fit(self, method="BFGS", minim_opts=None):
468 """
469 fit is equivalent to fit_map.
471 See fit_map for parameter information.
473 Use `fit_vb` to fit the model using variational Bayes.
474 """
475 self.fit_map(method, minim_opts)
477 def fit_map(self, method="BFGS", minim_opts=None, scale_fe=False):
478 """
479 Construct the Laplace approximation to the posterior distribution.
481 Parameters
482 ----------
483 method : str
484 Optimization method for finding the posterior mode.
485 minim_opts : dict
486 Options passed to scipy.minimize.
487 scale_fe : bool
488 If True, the columns of the fixed effects design matrix
489 are centered and scaled to unit variance before fitting
490 the model. The results are back-transformed so that the
491 results are presented on the original scale.
493 Returns
494 -------
495 BayesMixedGLMResults instance.
496 """
498 if scale_fe:
499 mn = self.exog.mean(0)
500 sc = self.exog.std(0)
501 self._exog_save = self.exog
502 self.exog = self.exog.copy()
503 ixs = np.flatnonzero(sc > 1e-8)
504 self.exog[:, ixs] -= mn[ixs]
505 self.exog[:, ixs] /= sc[ixs]
507 def fun(params):
508 return -self.logposterior(params)
510 def grad(params):
511 return -self.logposterior_grad(params)
513 start = self._get_start()
515 r = minimize(fun, start, method=method, jac=grad, options=minim_opts)
516 if not r.success:
517 msg = ("Laplace fitting did not converge, |gradient|=%.6f" %
518 np.sqrt(np.sum(r.jac**2)))
519 warnings.warn(msg)
521 from statsmodels.tools.numdiff import approx_fprime
522 hess = approx_fprime(r.x, grad)
523 cov = np.linalg.inv(hess)
525 params = r.x
527 if scale_fe:
528 self.exog = self._exog_save
529 del self._exog_save
530 params[ixs] /= sc[ixs]
531 cov[ixs, :][:, ixs] /= np.outer(sc[ixs], sc[ixs])
533 return BayesMixedGLMResults(self, params, cov, optim_retvals=r)
535 def predict(self, params, exog=None, linear=False):
536 """
537 Return the fitted mean structure.
539 Parameters
540 ----------
541 params : array_like
542 The parameter vector, may be the full parameter vector, or may
543 be truncated to include only the mean parameters.
544 exog : array_like
545 The design matrix for the mean structure. If omitted, use the
546 model's design matrix.
547 linear : bool
548 If True, return the linear predictor without passing through the
549 link function.
551 Returns
552 -------
553 A 1-dimensional array of predicted values
554 """
556 if exog is None:
557 exog = self.exog
559 q = exog.shape[1]
560 pr = np.dot(exog, params[0:q])
562 if not linear:
563 pr = self.family.link.inverse(pr)
565 return pr
568class _VariationalBayesMixedGLM(object):
569 """
570 A mixin providing generic (not family-specific) methods for
571 variational Bayes mean field fitting.
572 """
574 # Integration range (from -rng to +rng). The integrals are with
575 # respect to a standard Gaussian distribution so (-5, 5) will be
576 # sufficient in many cases.
577 rng = 5
579 verbose = False
581 # Returns the mean and variance of the linear predictor under the
582 # given distribution parameters.
583 def _lp_stats(self, fep_mean, fep_sd, vc_mean, vc_sd):
585 tm = np.dot(self.exog, fep_mean)
586 tv = np.dot(self.exog**2, fep_sd**2)
587 tm += self.exog_vc.dot(vc_mean)
588 tv += self.exog_vc2.dot(vc_sd**2)
590 return tm, tv
592 def vb_elbo_base(self, h, tm, fep_mean, vcp_mean, vc_mean, fep_sd, vcp_sd,
593 vc_sd):
594 """
595 Returns the evidence lower bound (ELBO) for the model.
597 This function calculates the family-specific ELBO function
598 based on information provided from a subclass.
600 Parameters
601 ----------
602 h : function mapping 1d vector to 1d vector
603 The contribution of the model to the ELBO function can be
604 expressed as y_i*lp_i + Eh_i(z), where y_i and lp_i are
605 the response and linear predictor for observation i, and z
606 is a standard normal random variable. This formulation
607 can be achieved for any GLM with a canonical link
608 function.
609 """
611 # p(y | vc) contributions
612 iv = 0
613 for w in glw:
614 z = self.rng * w[1]
615 iv += w[0] * h(z) * np.exp(-z**2 / 2)
616 iv /= np.sqrt(2 * np.pi)
617 iv *= self.rng
618 iv += self.endog * tm
619 iv = iv.sum()
621 # p(vc | vcp) * p(vcp) * p(fep) contributions
622 iv += self._elbo_common(fep_mean, fep_sd, vcp_mean, vcp_sd, vc_mean,
623 vc_sd)
625 r = (iv + np.sum(np.log(fep_sd)) + np.sum(np.log(vcp_sd)) + np.sum(
626 np.log(vc_sd)))
628 return r
630 def vb_elbo_grad_base(self, h, tm, tv, fep_mean, vcp_mean, vc_mean, fep_sd,
631 vcp_sd, vc_sd):
632 """
633 Return the gradient of the ELBO function.
635 See vb_elbo_base for parameters.
636 """
638 fep_mean_grad = 0.
639 fep_sd_grad = 0.
640 vcp_mean_grad = 0.
641 vcp_sd_grad = 0.
642 vc_mean_grad = 0.
643 vc_sd_grad = 0.
645 # p(y | vc) contributions
646 for w in glw:
647 z = self.rng * w[1]
648 u = h(z) * np.exp(-z**2 / 2) / np.sqrt(2 * np.pi)
649 r = u / np.sqrt(tv)
650 fep_mean_grad += w[0] * np.dot(u, self.exog)
651 vc_mean_grad += w[0] * self.exog_vc.transpose().dot(u)
652 fep_sd_grad += w[0] * z * np.dot(r, self.exog**2 * fep_sd)
653 v = self.exog_vc2.multiply(vc_sd).transpose().dot(r)
654 v = np.squeeze(np.asarray(v))
655 vc_sd_grad += w[0] * z * v
657 fep_mean_grad *= self.rng
658 vc_mean_grad *= self.rng
659 fep_sd_grad *= self.rng
660 vc_sd_grad *= self.rng
661 fep_mean_grad += np.dot(self.endog, self.exog)
662 vc_mean_grad += self.exog_vc.transpose().dot(self.endog)
664 (fep_mean_grad_i, fep_sd_grad_i, vcp_mean_grad_i, vcp_sd_grad_i,
665 vc_mean_grad_i, vc_sd_grad_i) = self._elbo_grad_common(
666 fep_mean, fep_sd, vcp_mean, vcp_sd, vc_mean, vc_sd)
668 fep_mean_grad += fep_mean_grad_i
669 fep_sd_grad += fep_sd_grad_i
670 vcp_mean_grad += vcp_mean_grad_i
671 vcp_sd_grad += vcp_sd_grad_i
672 vc_mean_grad += vc_mean_grad_i
673 vc_sd_grad += vc_sd_grad_i
675 fep_sd_grad += 1 / fep_sd
676 vcp_sd_grad += 1 / vcp_sd
677 vc_sd_grad += 1 / vc_sd
679 mean_grad = np.concatenate((fep_mean_grad, vcp_mean_grad,
680 vc_mean_grad))
681 sd_grad = np.concatenate((fep_sd_grad, vcp_sd_grad, vc_sd_grad))
683 if self.verbose:
684 print(
685 "|G|=%f" % np.sqrt(np.sum(mean_grad**2) + np.sum(sd_grad**2)))
687 return mean_grad, sd_grad
689 def fit_vb(self,
690 mean=None,
691 sd=None,
692 fit_method="BFGS",
693 minim_opts=None,
694 scale_fe=False,
695 verbose=False):
696 """
697 Fit a model using the variational Bayes mean field approximation.
699 Parameters
700 ----------
701 mean : array_like
702 Starting value for VB mean vector
703 sd : array_like
704 Starting value for VB standard deviation vector
705 fit_method : str
706 Algorithm for scipy.minimize
707 minim_opts : dict
708 Options passed to scipy.minimize
709 scale_fe : bool
710 If true, the columns of the fixed effects design matrix
711 are centered and scaled to unit variance before fitting
712 the model. The results are back-transformed so that the
713 results are presented on the original scale.
714 verbose : bool
715 If True, print the gradient norm to the screen each time
716 it is calculated.
718 Notes
719 -----
720 The goal is to find a factored Gaussian approximation
721 q1*q2*... to the posterior distribution, approximately
722 minimizing the KL divergence from the factored approximation
723 to the actual posterior. The KL divergence, or ELBO function
724 has the form
726 E* log p(y, fe, vcp, vc) - E* log q
728 where E* is expectation with respect to the product of qj.
730 References
731 ----------
732 Blei, Kucukelbir, McAuliffe (2017). Variational Inference: A
733 review for Statisticians
734 https://arxiv.org/pdf/1601.00670.pdf
735 """
737 self.verbose = verbose
739 if scale_fe:
740 mn = self.exog.mean(0)
741 sc = self.exog.std(0)
742 self._exog_save = self.exog
743 self.exog = self.exog.copy()
744 ixs = np.flatnonzero(sc > 1e-8)
745 self.exog[:, ixs] -= mn[ixs]
746 self.exog[:, ixs] /= sc[ixs]
748 n = self.k_fep + self.k_vcp + self.k_vc
749 ml = self.k_fep + self.k_vcp + self.k_vc
750 if mean is None:
751 m = np.zeros(n)
752 else:
753 if len(mean) != ml:
754 raise ValueError(
755 "mean has incorrect length, %d != %d" % (len(mean), ml))
756 m = mean.copy()
757 if sd is None:
758 s = -0.5 + 0.1 * np.random.normal(size=n)
759 else:
760 if len(sd) != ml:
761 raise ValueError(
762 "sd has incorrect length, %d != %d" % (len(sd), ml))
764 # s is parametrized on the log-scale internally when
765 # optimizing the ELBO function (this is transparent to the
766 # caller)
767 s = np.log(sd)
769 # Do not allow the variance parameter starting mean values to
770 # be too small.
771 i1, i2 = self.k_fep, self.k_fep + self.k_vcp
772 m[i1:i2] = np.where(m[i1:i2] < -1, -1, m[i1:i2])
774 # Do not allow the posterior standard deviation starting values
775 # to be too small.
776 s = np.where(s < -1, -1, s)
778 def elbo(x):
779 n = len(x) // 2
780 return -self.vb_elbo(x[:n], np.exp(x[n:]))
782 def elbo_grad(x):
783 n = len(x) // 2
784 gm, gs = self.vb_elbo_grad(x[:n], np.exp(x[n:]))
785 gs *= np.exp(x[n:])
786 return -np.concatenate((gm, gs))
788 start = np.concatenate((m, s))
789 mm = minimize(
790 elbo, start, jac=elbo_grad, method=fit_method, options=minim_opts)
791 if not mm.success:
792 warnings.warn("VB fitting did not converge")
794 n = len(mm.x) // 2
795 params = mm.x[0:n]
796 va = np.exp(2 * mm.x[n:])
798 if scale_fe:
799 self.exog = self._exog_save
800 del self._exog_save
801 params[ixs] /= sc[ixs]
802 va[ixs] /= sc[ixs]**2
804 return BayesMixedGLMResults(self, params, va, mm)
806 # Handle terms in the ELBO that are common to all models.
807 def _elbo_common(self, fep_mean, fep_sd, vcp_mean, vcp_sd, vc_mean, vc_sd):
809 iv = 0
811 # p(vc | vcp) contributions
812 m = vcp_mean[self.ident]
813 s = vcp_sd[self.ident]
814 iv -= np.sum((vc_mean**2 + vc_sd**2) * np.exp(2 * (s**2 - m))) / 2
815 iv -= np.sum(m)
817 # p(vcp) contributions
818 iv -= 0.5 * (vcp_mean**2 + vcp_sd**2).sum() / self.vcp_p**2
820 # p(b) contributions
821 iv -= 0.5 * (fep_mean**2 + fep_sd**2).sum() / self.fe_p**2
823 return iv
825 def _elbo_grad_common(self, fep_mean, fep_sd, vcp_mean, vcp_sd, vc_mean,
826 vc_sd):
828 # p(vc | vcp) contributions
829 m = vcp_mean[self.ident]
830 s = vcp_sd[self.ident]
831 u = vc_mean**2 + vc_sd**2
832 ve = np.exp(2 * (s**2 - m))
833 dm = u * ve - 1
834 ds = -2 * u * ve * s
835 vcp_mean_grad = np.bincount(self.ident, weights=dm)
836 vcp_sd_grad = np.bincount(self.ident, weights=ds)
838 vc_mean_grad = -vc_mean.copy() * ve
839 vc_sd_grad = -vc_sd.copy() * ve
841 # p(vcp) contributions
842 vcp_mean_grad -= vcp_mean / self.vcp_p**2
843 vcp_sd_grad -= vcp_sd / self.vcp_p**2
845 # p(b) contributions
846 fep_mean_grad = -fep_mean.copy() / self.fe_p**2
847 fep_sd_grad = -fep_sd.copy() / self.fe_p**2
849 return (fep_mean_grad, fep_sd_grad, vcp_mean_grad, vcp_sd_grad,
850 vc_mean_grad, vc_sd_grad)
853class BayesMixedGLMResults(object):
854 """
855 Class to hold results from a Bayesian estimation of a Mixed GLM model.
857 Attributes
858 ----------
859 fe_mean : array_like
860 Posterior mean of the fixed effects coefficients.
861 fe_sd : array_like
862 Posterior standard deviation of the fixed effects coefficients
863 vcp_mean : array_like
864 Posterior mean of the logged variance component standard
865 deviations.
866 vcp_sd : array_like
867 Posterior standard deviation of the logged variance component
868 standard deviations.
869 vc_mean : array_like
870 Posterior mean of the random coefficients
871 vc_sd : array_like
872 Posterior standard deviation of the random coefficients
873 """
875 def __init__(self, model, params, cov_params, optim_retvals=None):
877 self.model = model
878 self.params = params
879 self._cov_params = cov_params
880 self.optim_retvals = optim_retvals
882 self.fe_mean, self.vcp_mean, self.vc_mean = (model._unpack(params))
884 if cov_params.ndim == 2:
885 cp = np.diag(cov_params)
886 else:
887 cp = cov_params
888 self.fe_sd, self.vcp_sd, self.vc_sd = model._unpack(cp)
889 self.fe_sd = np.sqrt(self.fe_sd)
890 self.vcp_sd = np.sqrt(self.vcp_sd)
891 self.vc_sd = np.sqrt(self.vc_sd)
893 def cov_params(self):
895 if hasattr(self.model.data, "frame"):
896 # Return the covariance matrix as a dataframe or series
897 na = (self.model.fep_names + self.model.vcp_names +
898 self.model.vc_names)
899 if self._cov_params.ndim == 2:
900 return pd.DataFrame(self._cov_params, index=na, columns=na)
901 else:
902 return pd.Series(self._cov_params, index=na)
904 # Return the covariance matrix as a ndarray
905 return self._cov_params
907 def summary(self):
909 df = pd.DataFrame()
910 m = self.model.k_fep + self.model.k_vcp
911 df["Type"] = (["M" for k in range(self.model.k_fep)] +
912 ["V" for k in range(self.model.k_vcp)])
914 df["Post. Mean"] = self.params[0:m]
916 if self._cov_params.ndim == 2:
917 v = np.diag(self._cov_params)[0:m]
918 df["Post. SD"] = np.sqrt(v)
919 else:
920 df["Post. SD"] = np.sqrt(self._cov_params[0:m])
922 # Convert variance parameters to natural scale
923 df["SD"] = np.exp(df["Post. Mean"])
924 df["SD (LB)"] = np.exp(df["Post. Mean"] - 2 * df["Post. SD"])
925 df["SD (UB)"] = np.exp(df["Post. Mean"] + 2 * df["Post. SD"])
926 df["SD"] = ["%.3f" % x for x in df.SD]
927 df["SD (LB)"] = ["%.3f" % x for x in df["SD (LB)"]]
928 df["SD (UB)"] = ["%.3f" % x for x in df["SD (UB)"]]
929 df.loc[df.index < self.model.k_fep, "SD"] = ""
930 df.loc[df.index < self.model.k_fep, "SD (LB)"] = ""
931 df.loc[df.index < self.model.k_fep, "SD (UB)"] = ""
933 df.index = self.model.fep_names + self.model.vcp_names
935 summ = summary2.Summary()
936 summ.add_title(self.model.family.__class__.__name__ +
937 " Mixed GLM Results")
938 summ.add_df(df)
940 summ.add_text("Parameter types are mean structure (M) and "
941 "variance structure (V)")
942 summ.add_text("Variance parameters are modeled as log "
943 "standard deviations")
945 return summ
947 def random_effects(self, term=None):
948 """
949 Posterior mean and standard deviation of random effects.
951 Parameters
952 ----------
953 term : int or None
954 If None, results for all random effects are returned. If
955 an integer, returns results for a given set of random
956 effects. The value of `term` refers to an element of the
957 `ident` vector, or to a position in the `vc_formulas`
958 list.
960 Returns
961 -------
962 Data frame of posterior means and posterior standard
963 deviations of random effects.
964 """
966 z = self.vc_mean
967 s = self.vc_sd
968 na = self.model.vc_names
970 if term is not None:
971 termix = self.model.vcp_names.index(term)
972 ii = np.flatnonzero(self.model.ident == termix)
973 z = z[ii]
974 s = s[ii]
975 na = [na[i] for i in ii]
977 x = pd.DataFrame({"Mean": z, "SD": s})
979 if na is not None:
980 x.index = na
982 return x
984 def predict(self, exog=None, linear=False):
985 """
986 Return predicted values for the mean structure.
988 Parameters
989 ----------
990 exog : array_like
991 The design matrix for the mean structure. If None,
992 use the model's design matrix.
993 linear : bool
994 If True, returns the linear predictor, otherwise
995 transform the linear predictor using the link function.
997 Returns
998 -------
999 A one-dimensional array of fitted values.
1000 """
1002 return self.model.predict(self.params, exog, linear)
1005class BinomialBayesMixedGLM(_VariationalBayesMixedGLM, _BayesMixedGLM):
1007 __doc__ = _init_doc.format(example=_logit_example)
1009 def __init__(self,
1010 endog,
1011 exog,
1012 exog_vc,
1013 ident,
1014 vcp_p=1,
1015 fe_p=2,
1016 fep_names=None,
1017 vcp_names=None,
1018 vc_names=None):
1020 super(BinomialBayesMixedGLM, self).__init__(
1021 endog,
1022 exog,
1023 exog_vc=exog_vc,
1024 ident=ident,
1025 vcp_p=vcp_p,
1026 fe_p=fe_p,
1027 family=families.Binomial(),
1028 fep_names=fep_names,
1029 vcp_names=vcp_names,
1030 vc_names=vc_names)
1032 if not np.all(np.unique(endog) == np.r_[0, 1]):
1033 msg = "endog values must be 0 and 1, and not all identical"
1034 raise ValueError(msg)
1036 @classmethod
1037 def from_formula(cls, formula, vc_formulas, data, vcp_p=1, fe_p=2):
1039 fam = families.Binomial()
1040 x = _BayesMixedGLM.from_formula(
1041 formula, vc_formulas, data, family=fam, vcp_p=vcp_p, fe_p=fe_p)
1043 # Copy over to the intended class structure
1044 mod = BinomialBayesMixedGLM(
1045 x.endog,
1046 x.exog,
1047 exog_vc=x.exog_vc,
1048 ident=x.ident,
1049 vcp_p=x.vcp_p,
1050 fe_p=x.fe_p,
1051 fep_names=x.fep_names,
1052 vcp_names=x.vcp_names,
1053 vc_names=x.vc_names)
1054 mod.data = x.data
1056 return mod
1058 def vb_elbo(self, vb_mean, vb_sd):
1059 """
1060 Returns the evidence lower bound (ELBO) for the model.
1061 """
1063 fep_mean, vcp_mean, vc_mean = self._unpack(vb_mean)
1064 fep_sd, vcp_sd, vc_sd = self._unpack(vb_sd)
1065 tm, tv = self._lp_stats(fep_mean, fep_sd, vc_mean, vc_sd)
1067 def h(z):
1068 return -np.log(1 + np.exp(tm + np.sqrt(tv) * z))
1070 return self.vb_elbo_base(h, tm, fep_mean, vcp_mean, vc_mean, fep_sd,
1071 vcp_sd, vc_sd)
1073 def vb_elbo_grad(self, vb_mean, vb_sd):
1074 """
1075 Returns the gradient of the model's evidence lower bound (ELBO).
1076 """
1078 fep_mean, vcp_mean, vc_mean = self._unpack(vb_mean)
1079 fep_sd, vcp_sd, vc_sd = self._unpack(vb_sd)
1080 tm, tv = self._lp_stats(fep_mean, fep_sd, vc_mean, vc_sd)
1082 def h(z):
1083 u = tm + np.sqrt(tv) * z
1084 x = np.zeros_like(u)
1085 ii = np.flatnonzero(u > 0)
1086 uu = u[ii]
1087 x[ii] = 1 / (1 + np.exp(-uu))
1088 ii = np.flatnonzero(u <= 0)
1089 uu = u[ii]
1090 x[ii] = np.exp(uu) / (1 + np.exp(uu))
1091 return -x
1093 return self.vb_elbo_grad_base(h, tm, tv, fep_mean, vcp_mean, vc_mean,
1094 fep_sd, vcp_sd, vc_sd)
1097class PoissonBayesMixedGLM(_VariationalBayesMixedGLM, _BayesMixedGLM):
1099 __doc__ = _init_doc.format(example=_poisson_example)
1101 def __init__(self,
1102 endog,
1103 exog,
1104 exog_vc,
1105 ident,
1106 vcp_p=1,
1107 fe_p=2,
1108 fep_names=None,
1109 vcp_names=None,
1110 vc_names=None):
1112 super(PoissonBayesMixedGLM, self).__init__(
1113 endog=endog,
1114 exog=exog,
1115 exog_vc=exog_vc,
1116 ident=ident,
1117 vcp_p=vcp_p,
1118 fe_p=fe_p,
1119 family=families.Poisson(),
1120 fep_names=fep_names,
1121 vcp_names=vcp_names,
1122 vc_names=vc_names)
1124 @classmethod
1125 def from_formula(cls,
1126 formula,
1127 vc_formulas,
1128 data,
1129 vcp_p=1,
1130 fe_p=2,
1131 vcp_names=None,
1132 vc_names=None):
1134 fam = families.Poisson()
1135 x = _BayesMixedGLM.from_formula(
1136 formula,
1137 vc_formulas,
1138 data,
1139 family=fam,
1140 vcp_p=vcp_p,
1141 fe_p=fe_p)
1143 # Copy over to the intended class structure
1144 mod = PoissonBayesMixedGLM(
1145 endog=x.endog,
1146 exog=x.exog,
1147 exog_vc=x.exog_vc,
1148 ident=x.ident,
1149 vcp_p=x.vcp_p,
1150 fe_p=x.fe_p,
1151 fep_names=x.fep_names,
1152 vcp_names=x.vcp_names,
1153 vc_names=x.vc_names)
1154 mod.data = x.data
1156 return mod
1158 def vb_elbo(self, vb_mean, vb_sd):
1159 """
1160 Returns the evidence lower bound (ELBO) for the model.
1161 """
1163 fep_mean, vcp_mean, vc_mean = self._unpack(vb_mean)
1164 fep_sd, vcp_sd, vc_sd = self._unpack(vb_sd)
1165 tm, tv = self._lp_stats(fep_mean, fep_sd, vc_mean, vc_sd)
1167 def h(z):
1168 return -np.exp(tm + np.sqrt(tv) * z)
1170 return self.vb_elbo_base(h, tm, fep_mean, vcp_mean, vc_mean, fep_sd,
1171 vcp_sd, vc_sd)
1173 def vb_elbo_grad(self, vb_mean, vb_sd):
1174 """
1175 Returns the gradient of the model's evidence lower bound (ELBO).
1176 """
1178 fep_mean, vcp_mean, vc_mean = self._unpack(vb_mean)
1179 fep_sd, vcp_sd, vc_sd = self._unpack(vb_sd)
1180 tm, tv = self._lp_stats(fep_mean, fep_sd, vc_mean, vc_sd)
1182 def h(z):
1183 y = -np.exp(tm + np.sqrt(tv) * z)
1184 return y
1186 return self.vb_elbo_grad_base(h, tm, tv, fep_mean, vcp_mean, vc_mean,
1187 fep_sd, vcp_sd, vc_sd)