Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Covariance models and estimators for GEE. 

3 

4Some details for the covariance calculations can be found in the Stata 

5docs: 

6 

7http://www.stata.com/manuals13/xtxtgee.pdf 

8""" 

9from statsmodels.compat.python import iterkeys, itervalues 

10from statsmodels.compat.pandas import Appender 

11 

12from statsmodels.stats.correlation_tools import cov_nearest 

13import numpy as np 

14import pandas as pd 

15from scipy import linalg as spl 

16from collections import defaultdict 

17from statsmodels.tools.sm_exceptions import (ConvergenceWarning, OutputWarning, 

18 NotImplementedWarning) 

19import warnings 

20 

21 

22class CovStruct(object): 

23 """ 

24 Base class for correlation and covariance structures. 

25 

26 An implementation of this class takes the residuals from a 

27 regression model that has been fit to grouped data, and uses 

28 them to estimate the within-group dependence structure of the 

29 random errors in the model. 

30 

31 The current state of the covariance structure is represented 

32 through the value of the `dep_params` attribute. 

33 

34 The default state of a newly-created instance should always be 

35 the identity correlation matrix. 

36 """ 

37 

38 def __init__(self, cov_nearest_method="clipped"): 

39 

40 # Parameters describing the dependency structure 

41 self.dep_params = None 

42 

43 # Keep track of the number of times that the covariance was 

44 # adjusted. 

45 self.cov_adjust = [] 

46 

47 # Method for projecting the covariance matrix if it is not 

48 # PSD. 

49 self.cov_nearest_method = cov_nearest_method 

50 

51 def initialize(self, model): 

52 """ 

53 Called by GEE, used by implementations that need additional 

54 setup prior to running `fit`. 

55 

56 Parameters 

57 ---------- 

58 model : GEE class 

59 A reference to the parent GEE class instance. 

60 """ 

61 self.model = model 

62 

63 def update(self, params): 

64 """ 

65 Update the association parameter values based on the current 

66 regression coefficients. 

67 

68 Parameters 

69 ---------- 

70 params : array_like 

71 Working values for the regression parameters. 

72 """ 

73 raise NotImplementedError 

74 

75 def covariance_matrix(self, endog_expval, index): 

76 """ 

77 Returns the working covariance or correlation matrix for a 

78 given cluster of data. 

79 

80 Parameters 

81 ---------- 

82 endog_expval: array_like 

83 The expected values of endog for the cluster for which the 

84 covariance or correlation matrix will be returned 

85 index: int 

86 The index of the cluster for which the covariance or 

87 correlation matrix will be returned 

88 

89 Returns 

90 ------- 

91 M: matrix 

92 The covariance or correlation matrix of endog 

93 is_cor: bool 

94 True if M is a correlation matrix, False if M is a 

95 covariance matrix 

96 """ 

97 raise NotImplementedError 

98 

99 def covariance_matrix_solve(self, expval, index, stdev, rhs): 

100 """ 

101 Solves matrix equations of the form `covmat * soln = rhs` and 

102 returns the values of `soln`, where `covmat` is the covariance 

103 matrix represented by this class. 

104 

105 Parameters 

106 ---------- 

107 expval: array_like 

108 The expected value of endog for each observed value in the 

109 group. 

110 index: int 

111 The group index. 

112 stdev : array_like 

113 The standard deviation of endog for each observation in 

114 the group. 

115 rhs : list/tuple of array_like 

116 A set of right-hand sides; each defines a matrix equation 

117 to be solved. 

118 

119 Returns 

120 ------- 

121 soln : list/tuple of array_like 

122 The solutions to the matrix equations. 

123 

124 Notes 

125 ----- 

126 Returns None if the solver fails. 

127 

128 Some dependence structures do not use `expval` and/or `index` 

129 to determine the correlation matrix. Some families 

130 (e.g. binomial) do not use the `stdev` parameter when forming 

131 the covariance matrix. 

132 

133 If the covariance matrix is singular or not SPD, it is 

134 projected to the nearest such matrix. These projection events 

135 are recorded in the fit_history attribute of the GEE model. 

136 

137 Systems of linear equations with the covariance matrix as the 

138 left hand side (LHS) are solved for different right hand sides 

139 (RHS); the LHS is only factorized once to save time. 

140 

141 This is a default implementation, it can be reimplemented in 

142 subclasses to optimize the linear algebra according to the 

143 structure of the covariance matrix. 

144 """ 

145 

146 vmat, is_cor = self.covariance_matrix(expval, index) 

147 if is_cor: 

148 vmat *= np.outer(stdev, stdev) 

149 

150 # Factor the covariance matrix. If the factorization fails, 

151 # attempt to condition it into a factorizable matrix. 

152 threshold = 1e-2 

153 success = False 

154 cov_adjust = 0 

155 for itr in range(20): 

156 try: 

157 vco = spl.cho_factor(vmat) 

158 success = True 

159 break 

160 except np.linalg.LinAlgError: 

161 vmat = cov_nearest(vmat, method=self.cov_nearest_method, 

162 threshold=threshold) 

163 threshold *= 2 

164 cov_adjust += 1 

165 msg = "At least one covariance matrix was not PSD " 

166 msg += "and required projection." 

167 warnings.warn(msg) 

168 

169 self.cov_adjust.append(cov_adjust) 

170 

171 # Last resort if we still cannot factor the covariance matrix. 

172 if not success: 

173 warnings.warn( 

174 "Unable to condition covariance matrix to an SPD " 

175 "matrix using cov_nearest", ConvergenceWarning) 

176 vmat = np.diag(np.diag(vmat)) 

177 vco = spl.cho_factor(vmat) 

178 

179 soln = [spl.cho_solve(vco, x) for x in rhs] 

180 return soln 

181 

182 def summary(self): 

183 """ 

184 Returns a text summary of the current estimate of the 

185 dependence structure. 

186 """ 

187 raise NotImplementedError 

188 

189 

190class Independence(CovStruct): 

191 """ 

192 An independence working dependence structure. 

193 """ 

194 

195 @Appender(CovStruct.update.__doc__) 

196 def update(self, params): 

197 # Nothing to update 

198 return 

199 

200 @Appender(CovStruct.covariance_matrix.__doc__) 

201 def covariance_matrix(self, expval, index): 

202 dim = len(expval) 

203 return np.eye(dim, dtype=np.float64), True 

204 

205 @Appender(CovStruct.covariance_matrix_solve.__doc__) 

206 def covariance_matrix_solve(self, expval, index, stdev, rhs): 

207 v = stdev ** 2 

208 rslt = [] 

209 for x in rhs: 

210 if x.ndim == 1: 

211 rslt.append(x / v) 

212 else: 

213 rslt.append(x / v[:, None]) 

214 return rslt 

215 

216 def summary(self): 

217 return ("Observations within a cluster are modeled " 

218 "as being independent.") 

219 

220 

221class Exchangeable(CovStruct): 

222 """ 

223 An exchangeable working dependence structure. 

224 """ 

225 

226 def __init__(self): 

227 

228 super(Exchangeable, self).__init__() 

229 

230 # The correlation between any two values in the same cluster 

231 self.dep_params = 0. 

232 

233 @Appender(CovStruct.update.__doc__) 

234 def update(self, params): 

235 

236 endog = self.model.endog_li 

237 

238 nobs = self.model.nobs 

239 

240 varfunc = self.model.family.variance 

241 

242 cached_means = self.model.cached_means 

243 

244 has_weights = self.model.weights is not None 

245 weights_li = self.model.weights 

246 

247 residsq_sum, scale = 0, 0 

248 fsum1, fsum2, n_pairs = 0., 0., 0. 

249 for i in range(self.model.num_group): 

250 expval, _ = cached_means[i] 

251 stdev = np.sqrt(varfunc(expval)) 

252 resid = (endog[i] - expval) / stdev 

253 f = weights_li[i] if has_weights else 1. 

254 

255 ssr = np.sum(resid * resid) 

256 scale += f * ssr 

257 fsum1 += f * len(endog[i]) 

258 

259 residsq_sum += f * (resid.sum() ** 2 - ssr) / 2 

260 ngrp = len(resid) 

261 npr = 0.5 * ngrp * (ngrp - 1) 

262 fsum2 += f * npr 

263 n_pairs += npr 

264 

265 ddof = self.model.ddof_scale 

266 scale /= (fsum1 * (nobs - ddof) / float(nobs)) 

267 residsq_sum /= scale 

268 self.dep_params = residsq_sum / \ 

269 (fsum2 * (n_pairs - ddof) / float(n_pairs)) 

270 

271 @Appender(CovStruct.covariance_matrix.__doc__) 

272 def covariance_matrix(self, expval, index): 

273 dim = len(expval) 

274 dp = self.dep_params * np.ones((dim, dim), dtype=np.float64) 

275 np.fill_diagonal(dp, 1) 

276 return dp, True 

277 

278 @Appender(CovStruct.covariance_matrix_solve.__doc__) 

279 def covariance_matrix_solve(self, expval, index, stdev, rhs): 

280 

281 k = len(expval) 

282 c = self.dep_params / (1. - self.dep_params) 

283 c /= 1. + self.dep_params * (k - 1) 

284 

285 rslt = [] 

286 for x in rhs: 

287 if x.ndim == 1: 

288 x1 = x / stdev 

289 y = x1 / (1. - self.dep_params) 

290 y -= c * sum(x1) 

291 y /= stdev 

292 else: 

293 x1 = x / stdev[:, None] 

294 y = x1 / (1. - self.dep_params) 

295 y -= c * x1.sum(0) 

296 y /= stdev[:, None] 

297 rslt.append(y) 

298 

299 return rslt 

300 

301 def summary(self): 

302 return ("The correlation between two observations in the " + 

303 "same cluster is %.3f" % self.dep_params) 

304 

305 

306class Nested(CovStruct): 

307 """ 

308 A nested working dependence structure. 

309 

310 A nested working dependence structure captures unique variance 

311 associated with each level in a hierarchy of partitions of the 

312 cases. For each level of the hierarchy, there is a set of iid 

313 random effects with mean zero, and with variance that is specific 

314 to the level. These variance parameters are estimated from the 

315 data using the method of moments. 

316 

317 The top level of the hierarchy is always defined by the required 

318 `groups` argument to GEE. 

319 

320 The `dep_data` argument used to create the GEE defines the 

321 remaining levels of the hierarchy. it should be either an array, 

322 or if using the formula interface, a string that contains a 

323 formula. If an array, it should contain a `n_obs x k` matrix of 

324 labels, corresponding to the k levels of partitioning that are 

325 nested under the top-level `groups` of the GEE instance. These 

326 subgroups should be nested from left to right, so that two 

327 observations with the same label for column j of `dep_data` should 

328 also have the same label for all columns j' < j (this only applies 

329 to observations in the same top-level cluster given by the 

330 `groups` argument to GEE). 

331 

332 If `dep_data` is a formula, it should usually be of the form `0 + 

333 a + b + ...`, where `a`, `b`, etc. contain labels defining group 

334 membership. The `0 + ` should be included to prevent creation of 

335 an intercept. The variable values are interpreted as labels for 

336 group membership, but the variables should not be explicitly coded 

337 as categorical, i.e. use `0 + a` not `0 + C(a)`. 

338 

339 Notes 

340 ----- 

341 The calculations for the nested structure involve all pairs of 

342 observations within the top level `group` passed to GEE. Large 

343 group sizes will result in slow iterations. 

344 """ 

345 

346 def initialize(self, model): 

347 """ 

348 Called on the first call to update 

349 

350 `ilabels` is a list of n_i x n_i matrices containing integer 

351 labels that correspond to specific correlation parameters. 

352 Two elements of ilabels[i] with the same label share identical 

353 variance components. 

354 

355 `designx` is a matrix, with each row containing dummy 

356 variables indicating which variance components are associated 

357 with the corresponding element of QY. 

358 """ 

359 

360 super(Nested, self).initialize(model) 

361 

362 if self.model.weights is not None: 

363 warnings.warn("weights not implemented for nested cov_struct, " 

364 "using unweighted covariance estimate", 

365 NotImplementedWarning) 

366 

367 # A bit of processing of the nest data 

368 id_matrix = np.asarray(self.model.dep_data) 

369 if id_matrix.ndim == 1: 

370 id_matrix = id_matrix[:, None] 

371 self.id_matrix = id_matrix 

372 

373 endog = self.model.endog_li 

374 designx, ilabels = [], [] 

375 

376 # The number of layers of nesting 

377 n_nest = self.id_matrix.shape[1] 

378 

379 for i in range(self.model.num_group): 

380 ngrp = len(endog[i]) 

381 glab = self.model.group_labels[i] 

382 rix = self.model.group_indices[glab] 

383 

384 # Determine the number of common variance components 

385 # shared by each pair of observations. 

386 ix1, ix2 = np.tril_indices(ngrp, -1) 

387 ncm = (self.id_matrix[rix[ix1], :] == 

388 self.id_matrix[rix[ix2], :]).sum(1) 

389 

390 # This is used to construct the working correlation 

391 # matrix. 

392 ilabel = np.zeros((ngrp, ngrp), dtype=np.int32) 

393 ilabel[(ix1, ix2)] = ncm + 1 

394 ilabel[(ix2, ix1)] = ncm + 1 

395 ilabels.append(ilabel) 

396 

397 # This is used to estimate the variance components. 

398 dsx = np.zeros((len(ix1), n_nest + 1), dtype=np.float64) 

399 dsx[:, 0] = 1 

400 for k in np.unique(ncm): 

401 ii = np.flatnonzero(ncm == k) 

402 dsx[ii, 1:k + 1] = 1 

403 designx.append(dsx) 

404 

405 self.designx = np.concatenate(designx, axis=0) 

406 self.ilabels = ilabels 

407 

408 svd = np.linalg.svd(self.designx, 0) 

409 self.designx_u = svd[0] 

410 self.designx_s = svd[1] 

411 self.designx_v = svd[2].T 

412 

413 @Appender(CovStruct.update.__doc__) 

414 def update(self, params): 

415 

416 endog = self.model.endog_li 

417 

418 nobs = self.model.nobs 

419 dim = len(params) 

420 

421 if self.designx is None: 

422 self._compute_design(self.model) 

423 

424 cached_means = self.model.cached_means 

425 

426 varfunc = self.model.family.variance 

427 

428 dvmat = [] 

429 scale = 0. 

430 for i in range(self.model.num_group): 

431 

432 expval, _ = cached_means[i] 

433 

434 stdev = np.sqrt(varfunc(expval)) 

435 resid = (endog[i] - expval) / stdev 

436 

437 ix1, ix2 = np.tril_indices(len(resid), -1) 

438 dvmat.append(resid[ix1] * resid[ix2]) 

439 

440 scale += np.sum(resid ** 2) 

441 

442 dvmat = np.concatenate(dvmat) 

443 scale /= (nobs - dim) 

444 

445 # Use least squares regression to estimate the variance 

446 # components 

447 vcomp_coeff = np.dot(self.designx_v, np.dot(self.designx_u.T, 

448 dvmat) / self.designx_s) 

449 

450 self.vcomp_coeff = np.clip(vcomp_coeff, 0, np.inf) 

451 self.scale = scale 

452 

453 self.dep_params = self.vcomp_coeff.copy() 

454 

455 @Appender(CovStruct.covariance_matrix.__doc__) 

456 def covariance_matrix(self, expval, index): 

457 

458 dim = len(expval) 

459 

460 # First iteration 

461 if self.dep_params is None: 

462 return np.eye(dim, dtype=np.float64), True 

463 

464 ilabel = self.ilabels[index] 

465 

466 c = np.r_[self.scale, np.cumsum(self.vcomp_coeff)] 

467 vmat = c[ilabel] 

468 vmat /= self.scale 

469 return vmat, True 

470 

471 def summary(self): 

472 """ 

473 Returns a summary string describing the state of the 

474 dependence structure. 

475 """ 

476 

477 dep_names = ["Groups"] 

478 if hasattr(self.model, "_dep_data_names"): 

479 dep_names.extend(self.model._dep_data_names) 

480 else: 

481 dep_names.extend(["Component %d:" % (k + 1) for k in range(len(self.vcomp_coeff) - 1)]) 

482 if hasattr(self.model, "_groups_name"): 

483 dep_names[0] = self.model._groups_name 

484 dep_names.append("Residual") 

485 

486 vc = self.vcomp_coeff.tolist() 

487 vc.append(self.scale - np.sum(vc)) 

488 

489 smry = pd.DataFrame({"Variance": vc}, index=dep_names) 

490 

491 return smry 

492 

493 

494class Stationary(CovStruct): 

495 """ 

496 A stationary covariance structure. 

497 

498 The correlation between two observations is an arbitrary function 

499 of the distance between them. Distances up to a given maximum 

500 value are included in the covariance model. 

501 

502 Parameters 

503 ---------- 

504 max_lag : float 

505 The largest distance that is included in the covariance model. 

506 grid : bool 

507 If True, the index positions in the data (after dropping missing 

508 values) are used to define distances, and the `time` variable is 

509 ignored. 

510 """ 

511 

512 def __init__(self, max_lag=1, grid=False): 

513 

514 super(Stationary, self).__init__() 

515 self.max_lag = max_lag 

516 self.grid = grid 

517 self.dep_params = np.zeros(max_lag + 1) 

518 

519 def initialize(self, model): 

520 

521 super(Stationary, self).initialize(model) 

522 

523 # Time used as an index needs to be integer type. 

524 if not self.grid: 

525 time = self.model.time[:, 0].astype(np.int32) 

526 self.time = self.model.cluster_list(time) 

527 

528 @Appender(CovStruct.update.__doc__) 

529 def update(self, params): 

530 

531 if self.grid: 

532 self.update_grid(params) 

533 else: 

534 self.update_nogrid(params) 

535 

536 def update_grid(self, params): 

537 

538 endog = self.model.endog_li 

539 cached_means = self.model.cached_means 

540 varfunc = self.model.family.variance 

541 

542 dep_params = np.zeros(self.max_lag + 1) 

543 for i in range(self.model.num_group): 

544 

545 expval, _ = cached_means[i] 

546 stdev = np.sqrt(varfunc(expval)) 

547 resid = (endog[i] - expval) / stdev 

548 

549 dep_params[0] += np.sum(resid * resid) / len(resid) 

550 for j in range(1, self.max_lag + 1): 

551 v = resid[j:] 

552 dep_params[j] += np.sum(resid[0:-j] * v) / len(v) 

553 

554 dep_params /= dep_params[0] 

555 self.dep_params = dep_params 

556 

557 def update_nogrid(self, params): 

558 

559 endog = self.model.endog_li 

560 cached_means = self.model.cached_means 

561 varfunc = self.model.family.variance 

562 

563 dep_params = np.zeros(self.max_lag + 1) 

564 dn = np.zeros(self.max_lag + 1) 

565 resid_ssq = 0 

566 resid_ssq_n = 0 

567 for i in range(self.model.num_group): 

568 

569 expval, _ = cached_means[i] 

570 stdev = np.sqrt(varfunc(expval)) 

571 resid = (endog[i] - expval) / stdev 

572 

573 j1, j2 = np.tril_indices(len(expval), -1) 

574 dx = np.abs(self.time[i][j1] - self.time[i][j2]) 

575 ii = np.flatnonzero(dx <= self.max_lag) 

576 j1 = j1[ii] 

577 j2 = j2[ii] 

578 dx = dx[ii] 

579 

580 vs = np.bincount(dx, weights=resid[j1] * resid[j2], 

581 minlength=self.max_lag + 1) 

582 vd = np.bincount(dx, minlength=self.max_lag + 1) 

583 

584 resid_ssq += np.sum(resid**2) 

585 resid_ssq_n += len(resid) 

586 

587 ii = np.flatnonzero(vd > 0) 

588 if len(ii) > 0: 

589 dn[ii] += 1 

590 dep_params[ii] += vs[ii] / vd[ii] 

591 

592 i0 = np.flatnonzero(dn > 0) 

593 dep_params[i0] /= dn[i0] 

594 resid_msq = resid_ssq / resid_ssq_n 

595 dep_params /= resid_msq 

596 self.dep_params = dep_params 

597 

598 @Appender(CovStruct.covariance_matrix.__doc__) 

599 def covariance_matrix(self, endog_expval, index): 

600 

601 if self.grid: 

602 return self.covariance_matrix_grid(endog_expval, index) 

603 

604 j1, j2 = np.tril_indices(len(endog_expval), -1) 

605 dx = np.abs(self.time[index][j1] - self.time[index][j2]) 

606 ii = np.flatnonzero(dx <= self.max_lag) 

607 j1 = j1[ii] 

608 j2 = j2[ii] 

609 dx = dx[ii] 

610 

611 cmat = np.eye(len(endog_expval)) 

612 cmat[j1, j2] = self.dep_params[dx] 

613 cmat[j2, j1] = self.dep_params[dx] 

614 

615 return cmat, True 

616 

617 def covariance_matrix_grid(self, endog_expval, index): 

618 

619 from scipy.linalg import toeplitz 

620 r = np.zeros(len(endog_expval)) 

621 r[0] = 1 

622 r[1:self.max_lag + 1] = self.dep_params[1:] 

623 return toeplitz(r), True 

624 

625 @Appender(CovStruct.covariance_matrix_solve.__doc__) 

626 def covariance_matrix_solve(self, expval, index, stdev, rhs): 

627 

628 if not self.grid: 

629 return super(Stationary, self).covariance_matrix_solve( 

630 expval, index, stdev, rhs) 

631 

632 from statsmodels.tools.linalg import stationary_solve 

633 r = np.zeros(len(expval)) 

634 r[0:self.max_lag] = self.dep_params[1:] 

635 return [stationary_solve(r, x) for x in rhs] 

636 

637 def summary(self): 

638 

639 lag = np.arange(self.max_lag + 1) 

640 return pd.DataFrame({"Lag": lag, "Cov": self.dep_params}) 

641 

642 

643class Autoregressive(CovStruct): 

644 """ 

645 A first-order autoregressive working dependence structure. 

646 

647 The dependence is defined in terms of the `time` component of the 

648 parent GEE class, which defaults to the index position of each 

649 value within its cluster, based on the order of values in the 

650 input data set. Time represents a potentially multidimensional 

651 index from which distances between pairs of observations can be 

652 determined. 

653 

654 The correlation between two observations in the same cluster is 

655 dep_params^distance, where `dep_params` contains the (scalar) 

656 autocorrelation parameter to be estimated, and `distance` is the 

657 distance between the two observations, calculated from their 

658 corresponding time values. `time` is stored as an n_obs x k 

659 matrix, where `k` represents the number of dimensions in the time 

660 index. 

661 

662 The autocorrelation parameter is estimated using weighted 

663 nonlinear least squares, regressing each value within a cluster on 

664 each preceding value in the same cluster. 

665 

666 Parameters 

667 ---------- 

668 dist_func : function from R^k x R^k to R^+, optional 

669 A function that computes the distance between the two 

670 observations based on their `time` values. 

671 

672 References 

673 ---------- 

674 B Rosner, A Munoz. Autoregressive modeling for the analysis of 

675 longitudinal data with unequally spaced examinations. Statistics 

676 in medicine. Vol 7, 59-71, 1988. 

677 """ 

678 

679 def __init__(self, dist_func=None): 

680 

681 super(Autoregressive, self).__init__() 

682 

683 # The function for determining distances based on time 

684 if dist_func is None: 

685 self.dist_func = lambda x, y: np.abs(x - y).sum() 

686 else: 

687 self.dist_func = dist_func 

688 

689 self.designx = None 

690 

691 # The autocorrelation parameter 

692 self.dep_params = 0. 

693 

694 @Appender(CovStruct.update.__doc__) 

695 def update(self, params): 

696 

697 if self.model.weights is not None: 

698 warnings.warn("weights not implemented for autoregressive " 

699 "cov_struct, using unweighted covariance estimate", 

700 NotImplementedWarning) 

701 

702 endog = self.model.endog_li 

703 time = self.model.time_li 

704 

705 # Only need to compute this once 

706 if self.designx is not None: 

707 designx = self.designx 

708 else: 

709 designx = [] 

710 for i in range(self.model.num_group): 

711 

712 ngrp = len(endog[i]) 

713 if ngrp == 0: 

714 continue 

715 

716 # Loop over pairs of observations within a cluster 

717 for j1 in range(ngrp): 

718 for j2 in range(j1): 

719 designx.append(self.dist_func(time[i][j1, :], 

720 time[i][j2, :])) 

721 

722 designx = np.array(designx) 

723 self.designx = designx 

724 

725 scale = self.model.estimate_scale() 

726 varfunc = self.model.family.variance 

727 cached_means = self.model.cached_means 

728 

729 # Weights 

730 var = 1. - self.dep_params ** (2 * designx) 

731 var /= 1. - self.dep_params ** 2 

732 wts = 1. / var 

733 wts /= wts.sum() 

734 

735 residmat = [] 

736 for i in range(self.model.num_group): 

737 

738 expval, _ = cached_means[i] 

739 stdev = np.sqrt(scale * varfunc(expval)) 

740 resid = (endog[i] - expval) / stdev 

741 

742 ngrp = len(resid) 

743 for j1 in range(ngrp): 

744 for j2 in range(j1): 

745 residmat.append([resid[j1], resid[j2]]) 

746 

747 residmat = np.array(residmat) 

748 

749 # Need to minimize this 

750 def fitfunc(a): 

751 dif = residmat[:, 0] - (a ** designx) * residmat[:, 1] 

752 return np.dot(dif ** 2, wts) 

753 

754 # Left bracket point 

755 b_lft, f_lft = 0., fitfunc(0.) 

756 

757 # Center bracket point 

758 b_ctr, f_ctr = 0.5, fitfunc(0.5) 

759 while f_ctr > f_lft: 

760 b_ctr /= 2 

761 f_ctr = fitfunc(b_ctr) 

762 if b_ctr < 1e-8: 

763 self.dep_params = 0 

764 return 

765 

766 # Right bracket point 

767 b_rgt, f_rgt = 0.75, fitfunc(0.75) 

768 while f_rgt < f_ctr: 

769 b_rgt = b_rgt + (1. - b_rgt) / 2 

770 f_rgt = fitfunc(b_rgt) 

771 if b_rgt > 1. - 1e-6: 

772 raise ValueError( 

773 "Autoregressive: unable to find right bracket") 

774 

775 from scipy.optimize import brent 

776 self.dep_params = brent(fitfunc, brack=[b_lft, b_ctr, b_rgt]) 

777 

778 @Appender(CovStruct.covariance_matrix.__doc__) 

779 def covariance_matrix(self, endog_expval, index): 

780 ngrp = len(endog_expval) 

781 if self.dep_params == 0: 

782 return np.eye(ngrp, dtype=np.float64), True 

783 idx = np.arange(ngrp) 

784 cmat = self.dep_params ** np.abs(idx[:, None] - idx[None, :]) 

785 return cmat, True 

786 

787 @Appender(CovStruct.covariance_matrix_solve.__doc__) 

788 def covariance_matrix_solve(self, expval, index, stdev, rhs): 

789 # The inverse of an AR(1) covariance matrix is tri-diagonal. 

790 

791 k = len(expval) 

792 soln = [] 

793 

794 # LHS has 1 column 

795 if k == 1: 

796 return [x / stdev ** 2 for x in rhs] 

797 

798 # LHS has 2 columns 

799 if k == 2: 

800 mat = np.array([[1, -self.dep_params], [-self.dep_params, 1]]) 

801 mat /= (1. - self.dep_params ** 2) 

802 for x in rhs: 

803 if x.ndim == 1: 

804 x1 = x / stdev 

805 else: 

806 x1 = x / stdev[:, None] 

807 x1 = np.dot(mat, x1) 

808 if x.ndim == 1: 

809 x1 /= stdev 

810 else: 

811 x1 /= stdev[:, None] 

812 soln.append(x1) 

813 return soln 

814 

815 # LHS has >= 3 columns: values c0, c1, c2 defined below give 

816 # the inverse. c0 is on the diagonal, except for the first 

817 # and last position. c1 is on the first and last position of 

818 # the diagonal. c2 is on the sub/super diagonal. 

819 c0 = (1. + self.dep_params ** 2) / (1. - self.dep_params ** 2) 

820 c1 = 1. / (1. - self.dep_params ** 2) 

821 c2 = -self.dep_params / (1. - self.dep_params ** 2) 

822 soln = [] 

823 for x in rhs: 

824 flatten = False 

825 if x.ndim == 1: 

826 x = x[:, None] 

827 flatten = True 

828 x1 = x / stdev[:, None] 

829 

830 z0 = np.zeros((1, x.shape[1])) 

831 rhs1 = np.concatenate((x[1:, :], z0), axis=0) 

832 rhs2 = np.concatenate((z0, x[0:-1, :]), axis=0) 

833 

834 y = c0 * x + c2 * rhs1 + c2 * rhs2 

835 y[0, :] = c1 * x[0, :] + c2 * x[1, :] 

836 y[-1, :] = c1 * x[-1, :] + c2 * x[-2, :] 

837 

838 y /= stdev[:, None] 

839 

840 if flatten: 

841 y = np.squeeze(y) 

842 

843 soln.append(y) 

844 

845 return soln 

846 

847 def summary(self): 

848 

849 return ("Autoregressive(1) dependence parameter: %.3f\n" % 

850 self.dep_params) 

851 

852 

853class CategoricalCovStruct(CovStruct): 

854 """ 

855 Parent class for covariance structure for categorical data models. 

856 

857 Attributes 

858 ---------- 

859 nlevel : int 

860 The number of distinct levels for the outcome variable. 

861 ibd : list 

862 A list whose i^th element ibd[i] is an array whose rows 

863 contain integer pairs (a,b), where endog_li[i][a:b] is the 

864 subvector of binary indicators derived from the same ordinal 

865 value. 

866 """ 

867 

868 def initialize(self, model): 

869 

870 super(CategoricalCovStruct, self).initialize(model) 

871 

872 self.nlevel = len(model.endog_values) 

873 self._ncut = self.nlevel - 1 

874 

875 from numpy.lib.stride_tricks import as_strided 

876 b = np.dtype(np.int64).itemsize 

877 

878 ibd = [] 

879 for v in model.endog_li: 

880 jj = np.arange(0, len(v) + 1, self._ncut, dtype=np.int64) 

881 jj = as_strided(jj, shape=(len(jj) - 1, 2), strides=(b, b)) 

882 ibd.append(jj) 

883 

884 self.ibd = ibd 

885 

886 

887class GlobalOddsRatio(CategoricalCovStruct): 

888 """ 

889 Estimate the global odds ratio for a GEE with ordinal or nominal 

890 data. 

891 

892 References 

893 ---------- 

894 PJ Heagerty and S Zeger. "Marginal Regression Models for Clustered 

895 Ordinal Measurements". Journal of the American Statistical 

896 Association Vol. 91, Issue 435 (1996). 

897 

898 Thomas Lumley. Generalized Estimating Equations for Ordinal Data: 

899 A Note on Working Correlation Structures. Biometrics Vol. 52, 

900 No. 1 (Mar., 1996), pp. 354-361 

901 http://www.jstor.org/stable/2533173 

902 

903 Notes 

904 ----- 

905 The following data structures are calculated in the class: 

906 

907 'ibd' is a list whose i^th element ibd[i] is a sequence of integer 

908 pairs (a,b), where endog_li[i][a:b] is the subvector of binary 

909 indicators derived from the same ordinal value. 

910 

911 `cpp` is a dictionary where cpp[group] is a map from cut-point 

912 pairs (c,c') to the indices of all between-subject pairs derived 

913 from the given cut points. 

914 """ 

915 

916 def __init__(self, endog_type): 

917 super(GlobalOddsRatio, self).__init__() 

918 self.endog_type = endog_type 

919 self.dep_params = 0. 

920 

921 def initialize(self, model): 

922 

923 super(GlobalOddsRatio, self).initialize(model) 

924 

925 if self.model.weights is not None: 

926 warnings.warn("weights not implemented for GlobalOddsRatio " 

927 "cov_struct, using unweighted covariance estimate", 

928 NotImplementedWarning) 

929 

930 # Need to restrict to between-subject pairs 

931 cpp = [] 

932 for v in model.endog_li: 

933 

934 # Number of subjects in this group 

935 m = int(len(v) / self._ncut) 

936 i1, i2 = np.tril_indices(m, -1) 

937 

938 cpp1 = {} 

939 for k1 in range(self._ncut): 

940 for k2 in range(k1 + 1): 

941 jj = np.zeros((len(i1), 2), dtype=np.int64) 

942 jj[:, 0] = i1 * self._ncut + k1 

943 jj[:, 1] = i2 * self._ncut + k2 

944 cpp1[(k2, k1)] = jj 

945 

946 cpp.append(cpp1) 

947 

948 self.cpp = cpp 

949 

950 # Initialize the dependence parameters 

951 self.crude_or = self.observed_crude_oddsratio() 

952 if self.model.update_dep: 

953 self.dep_params = self.crude_or 

954 

955 def pooled_odds_ratio(self, tables): 

956 """ 

957 Returns the pooled odds ratio for a list of 2x2 tables. 

958 

959 The pooled odds ratio is the inverse variance weighted average 

960 of the sample odds ratios of the tables. 

961 """ 

962 

963 if len(tables) == 0: 

964 return 1. 

965 

966 # Get the sampled odds ratios and variances 

967 log_oddsratio, var = [], [] 

968 for table in tables: 

969 lor = np.log(table[1, 1]) + np.log(table[0, 0]) -\ 

970 np.log(table[0, 1]) - np.log(table[1, 0]) 

971 log_oddsratio.append(lor) 

972 var.append((1 / table.astype(np.float64)).sum()) 

973 

974 # Calculate the inverse variance weighted average 

975 wts = [1 / v for v in var] 

976 wtsum = sum(wts) 

977 wts = [w / wtsum for w in wts] 

978 log_pooled_or = sum([w * e for w, e in zip(wts, log_oddsratio)]) 

979 

980 return np.exp(log_pooled_or) 

981 

982 @Appender(CovStruct.covariance_matrix.__doc__) 

983 def covariance_matrix(self, expected_value, index): 

984 

985 vmat = self.get_eyy(expected_value, index) 

986 vmat -= np.outer(expected_value, expected_value) 

987 return vmat, False 

988 

989 def observed_crude_oddsratio(self): 

990 """ 

991 To obtain the crude (global) odds ratio, first pool all binary 

992 indicators corresponding to a given pair of cut points (c,c'), 

993 then calculate the odds ratio for this 2x2 table. The crude 

994 odds ratio is the inverse variance weighted average of these 

995 odds ratios. Since the covariate effects are ignored, this OR 

996 will generally be greater than the stratified OR. 

997 """ 

998 

999 cpp = self.cpp 

1000 endog = self.model.endog_li 

1001 

1002 # Storage for the contingency tables for each (c,c') 

1003 tables = {} 

1004 for ii in iterkeys(cpp[0]): 

1005 tables[ii] = np.zeros((2, 2), dtype=np.float64) 

1006 

1007 # Get the observed crude OR 

1008 for i in range(len(endog)): 

1009 

1010 # The observed joint values for the current cluster 

1011 yvec = endog[i] 

1012 endog_11 = np.outer(yvec, yvec) 

1013 endog_10 = np.outer(yvec, 1. - yvec) 

1014 endog_01 = np.outer(1. - yvec, yvec) 

1015 endog_00 = np.outer(1. - yvec, 1. - yvec) 

1016 

1017 cpp1 = cpp[i] 

1018 for ky in iterkeys(cpp1): 

1019 ix = cpp1[ky] 

1020 tables[ky][1, 1] += endog_11[ix[:, 0], ix[:, 1]].sum() 

1021 tables[ky][1, 0] += endog_10[ix[:, 0], ix[:, 1]].sum() 

1022 tables[ky][0, 1] += endog_01[ix[:, 0], ix[:, 1]].sum() 

1023 tables[ky][0, 0] += endog_00[ix[:, 0], ix[:, 1]].sum() 

1024 

1025 return self.pooled_odds_ratio(list(itervalues(tables))) 

1026 

1027 def get_eyy(self, endog_expval, index): 

1028 """ 

1029 Returns a matrix V such that V[i,j] is the joint probability 

1030 that endog[i] = 1 and endog[j] = 1, based on the marginal 

1031 probabilities of endog and the global odds ratio `current_or`. 

1032 """ 

1033 

1034 current_or = self.dep_params 

1035 ibd = self.ibd[index] 

1036 

1037 # The between-observation joint probabilities 

1038 if current_or == 1.0: 

1039 vmat = np.outer(endog_expval, endog_expval) 

1040 else: 

1041 psum = endog_expval[:, None] + endog_expval[None, :] 

1042 pprod = endog_expval[:, None] * endog_expval[None, :] 

1043 pfac = np.sqrt((1. + psum * (current_or - 1.)) ** 2 + 

1044 4 * current_or * (1. - current_or) * pprod) 

1045 vmat = 1. + psum * (current_or - 1.) - pfac 

1046 vmat /= 2. * (current_or - 1) 

1047 

1048 # Fix E[YY'] for elements that belong to same observation 

1049 for bdl in ibd: 

1050 evy = endog_expval[bdl[0]:bdl[1]] 

1051 if self.endog_type == "ordinal": 

1052 vmat[bdl[0]:bdl[1], bdl[0]:bdl[1]] =\ 

1053 np.minimum.outer(evy, evy) 

1054 else: 

1055 vmat[bdl[0]:bdl[1], bdl[0]:bdl[1]] = np.diag(evy) 

1056 

1057 return vmat 

1058 

1059 @Appender(CovStruct.update.__doc__) 

1060 def update(self, params): 

1061 """ 

1062 Update the global odds ratio based on the current value of 

1063 params. 

1064 """ 

1065 

1066 cpp = self.cpp 

1067 cached_means = self.model.cached_means 

1068 

1069 # This will happen if all the clusters have only 

1070 # one observation 

1071 if len(cpp[0]) == 0: 

1072 return 

1073 

1074 tables = {} 

1075 for ii in cpp[0]: 

1076 tables[ii] = np.zeros((2, 2), dtype=np.float64) 

1077 

1078 for i in range(self.model.num_group): 

1079 

1080 endog_expval, _ = cached_means[i] 

1081 

1082 emat_11 = self.get_eyy(endog_expval, i) 

1083 emat_10 = endog_expval[:, None] - emat_11 

1084 emat_01 = -emat_11 + endog_expval 

1085 emat_00 = 1. - (emat_11 + emat_10 + emat_01) 

1086 

1087 cpp1 = cpp[i] 

1088 for ky in iterkeys(cpp1): 

1089 ix = cpp1[ky] 

1090 tables[ky][1, 1] += emat_11[ix[:, 0], ix[:, 1]].sum() 

1091 tables[ky][1, 0] += emat_10[ix[:, 0], ix[:, 1]].sum() 

1092 tables[ky][0, 1] += emat_01[ix[:, 0], ix[:, 1]].sum() 

1093 tables[ky][0, 0] += emat_00[ix[:, 0], ix[:, 1]].sum() 

1094 

1095 cor_expval = self.pooled_odds_ratio(list(itervalues(tables))) 

1096 

1097 self.dep_params *= self.crude_or / cor_expval 

1098 if not np.isfinite(self.dep_params): 

1099 self.dep_params = 1. 

1100 warnings.warn("dep_params became inf, resetting to 1", 

1101 ConvergenceWarning) 

1102 

1103 def summary(self): 

1104 return "Global odds ratio: %.3f\n" % self.dep_params 

1105 

1106 

1107class OrdinalIndependence(CategoricalCovStruct): 

1108 """ 

1109 An independence covariance structure for ordinal models. 

1110 

1111 The working covariance between indicators derived from different 

1112 observations is zero. The working covariance between indicators 

1113 derived form a common observation is determined from their current 

1114 mean values. 

1115 

1116 There are no parameters to estimate in this covariance structure. 

1117 """ 

1118 

1119 def covariance_matrix(self, expected_value, index): 

1120 

1121 ibd = self.ibd[index] 

1122 n = len(expected_value) 

1123 vmat = np.zeros((n, n)) 

1124 

1125 for bdl in ibd: 

1126 ev = expected_value[bdl[0]:bdl[1]] 

1127 vmat[bdl[0]:bdl[1], bdl[0]:bdl[1]] =\ 

1128 np.minimum.outer(ev, ev) - np.outer(ev, ev) 

1129 

1130 return vmat, False 

1131 

1132 # Nothing to update 

1133 def update(self, params): 

1134 pass 

1135 

1136 

1137class NominalIndependence(CategoricalCovStruct): 

1138 """ 

1139 An independence covariance structure for nominal models. 

1140 

1141 The working covariance between indicators derived from different 

1142 observations is zero. The working covariance between indicators 

1143 derived form a common observation is determined from their current 

1144 mean values. 

1145 

1146 There are no parameters to estimate in this covariance structure. 

1147 """ 

1148 

1149 def covariance_matrix(self, expected_value, index): 

1150 

1151 ibd = self.ibd[index] 

1152 n = len(expected_value) 

1153 vmat = np.zeros((n, n)) 

1154 

1155 for bdl in ibd: 

1156 ev = expected_value[bdl[0]:bdl[1]] 

1157 vmat[bdl[0]:bdl[1], bdl[0]:bdl[1]] =\ 

1158 np.diag(ev) - np.outer(ev, ev) 

1159 

1160 return vmat, False 

1161 

1162 # Nothing to update 

1163 def update(self, params): 

1164 pass 

1165 

1166 

1167class Equivalence(CovStruct): 

1168 """ 

1169 A covariance structure defined in terms of equivalence classes. 

1170 

1171 An 'equivalence class' is a set of pairs of observations such that 

1172 the covariance of every pair within the equivalence class has a 

1173 common value. 

1174 

1175 Parameters 

1176 ---------- 

1177 pairs : dict-like 

1178 A dictionary of dictionaries, where `pairs[group][label]` 

1179 provides the indices of all pairs of observations in the group 

1180 that have the same covariance value. Specifically, 

1181 `pairs[group][label]` is a tuple `(j1, j2)`, where `j1` and `j2` 

1182 are integer arrays of the same length. `j1[i], j2[i]` is one 

1183 index pair that belongs to the `label` equivalence class. Only 

1184 one triangle of each covariance matrix should be included. 

1185 Positions where j1 and j2 have the same value are variance 

1186 parameters. 

1187 labels : array_like 

1188 An array of labels such that every distinct pair of labels 

1189 defines an equivalence class. Either `labels` or `pairs` must 

1190 be provided. When the two labels in a pair are equal two 

1191 equivalence classes are defined: one for the diagonal elements 

1192 (corresponding to variances) and one for the off-diagonal 

1193 elements (corresponding to covariances). 

1194 return_cov : bool 

1195 If True, `covariance_matrix` returns an estimate of the 

1196 covariance matrix, otherwise returns an estimate of the 

1197 correlation matrix. 

1198 

1199 Notes 

1200 ----- 

1201 Using `labels` to define the class is much easier than using 

1202 `pairs`, but is less general. 

1203 

1204 Any pair of values not contained in `pairs` will be assigned zero 

1205 covariance. 

1206 

1207 The index values in `pairs` are row indices into the `exog` 

1208 matrix. They are not updated if missing data are present. When 

1209 using this covariance structure, missing data should be removed 

1210 before constructing the model. 

1211 

1212 If using `labels`, after a model is defined using the covariance 

1213 structure it is possible to remove a label pair from the second 

1214 level of the `pairs` dictionary to force the corresponding 

1215 covariance to be zero. 

1216 

1217 Examples 

1218 -------- 

1219 The following sets up the `pairs` dictionary for a model with two 

1220 groups, equal variance for all observations, and constant 

1221 covariance for all pairs of observations within each group. 

1222 

1223 >> pairs = {0: {}, 1: {}} 

1224 >> pairs[0][0] = (np.r_[0, 1, 2], np.r_[0, 1, 2]) 

1225 >> pairs[0][1] = np.tril_indices(3, -1) 

1226 >> pairs[1][0] = (np.r_[3, 4, 5], np.r_[3, 4, 5]) 

1227 >> pairs[1][2] = 3 + np.tril_indices(3, -1) 

1228 """ 

1229 

1230 def __init__(self, pairs=None, labels=None, return_cov=False): 

1231 

1232 super(Equivalence, self).__init__() 

1233 

1234 if (pairs is None) and (labels is None): 

1235 raise ValueError( 

1236 "Equivalence cov_struct requires either `pairs` or `labels`") 

1237 

1238 if (pairs is not None) and (labels is not None): 

1239 raise ValueError( 

1240 "Equivalence cov_struct accepts only one of `pairs` " 

1241 "and `labels`") 

1242 

1243 if pairs is not None: 

1244 import copy 

1245 self.pairs = copy.deepcopy(pairs) 

1246 

1247 if labels is not None: 

1248 self.labels = np.asarray(labels) 

1249 

1250 self.return_cov = return_cov 

1251 

1252 def _make_pairs(self, i, j): 

1253 """ 

1254 Create arrays containing all unique ordered pairs of i, j. 

1255 

1256 The arrays i and j must be one-dimensional containing non-negative 

1257 integers. 

1258 """ 

1259 

1260 mat = np.zeros((len(i) * len(j), 2), dtype=np.int32) 

1261 

1262 # Create the pairs and order them 

1263 f = np.ones(len(j)) 

1264 mat[:, 0] = np.kron(f, i).astype(np.int32) 

1265 f = np.ones(len(i)) 

1266 mat[:, 1] = np.kron(j, f).astype(np.int32) 

1267 mat.sort(1) 

1268 

1269 # Remove repeated rows 

1270 try: 

1271 dtype = np.dtype((np.void, mat.dtype.itemsize * mat.shape[1])) 

1272 bmat = np.ascontiguousarray(mat).view(dtype) 

1273 _, idx = np.unique(bmat, return_index=True) 

1274 except TypeError: 

1275 # workaround for old numpy that cannot call unique with complex 

1276 # dtypes 

1277 rs = np.random.RandomState(4234) 

1278 bmat = np.dot(mat, rs.uniform(size=mat.shape[1])) 

1279 _, idx = np.unique(bmat, return_index=True) 

1280 mat = mat[idx, :] 

1281 

1282 return mat[:, 0], mat[:, 1] 

1283 

1284 def _pairs_from_labels(self): 

1285 

1286 from collections import defaultdict 

1287 pairs = defaultdict(lambda: defaultdict(lambda: None)) 

1288 

1289 model = self.model 

1290 

1291 df = pd.DataFrame({"labels": self.labels, "groups": model.groups}) 

1292 gb = df.groupby(["groups", "labels"]) 

1293 

1294 ulabels = np.unique(self.labels) 

1295 

1296 for g_ix, g_lb in enumerate(model.group_labels): 

1297 

1298 # Loop over label pairs 

1299 for lx1 in range(len(ulabels)): 

1300 for lx2 in range(lx1 + 1): 

1301 

1302 lb1 = ulabels[lx1] 

1303 lb2 = ulabels[lx2] 

1304 

1305 try: 

1306 i1 = gb.groups[(g_lb, lb1)] 

1307 i2 = gb.groups[(g_lb, lb2)] 

1308 except KeyError: 

1309 continue 

1310 

1311 i1, i2 = self._make_pairs(i1, i2) 

1312 

1313 clabel = str(lb1) + "/" + str(lb2) 

1314 

1315 # Variance parameters belong in their own equiv class. 

1316 jj = np.flatnonzero(i1 == i2) 

1317 if len(jj) > 0: 

1318 clabelv = clabel + "/v" 

1319 pairs[g_lb][clabelv] = (i1[jj], i2[jj]) 

1320 

1321 # Covariance parameters 

1322 jj = np.flatnonzero(i1 != i2) 

1323 if len(jj) > 0: 

1324 i1 = i1[jj] 

1325 i2 = i2[jj] 

1326 pairs[g_lb][clabel] = (i1, i2) 

1327 

1328 self.pairs = pairs 

1329 

1330 def initialize(self, model): 

1331 

1332 super(Equivalence, self).initialize(model) 

1333 

1334 if self.model.weights is not None: 

1335 warnings.warn("weights not implemented for equalence cov_struct, " 

1336 "using unweighted covariance estimate", 

1337 NotImplementedWarning) 

1338 

1339 if not hasattr(self, 'pairs'): 

1340 self._pairs_from_labels() 

1341 

1342 # Initialize so that any equivalence class containing a 

1343 # variance parameter has value 1. 

1344 self.dep_params = defaultdict(lambda: 0.) 

1345 self._var_classes = set([]) 

1346 for gp in self.model.group_labels: 

1347 for lb in self.pairs[gp]: 

1348 j1, j2 = self.pairs[gp][lb] 

1349 if np.any(j1 == j2): 

1350 if not np.all(j1 == j2): 

1351 warnings.warn( 

1352 "equivalence class contains both variance " 

1353 "and covariance parameters", OutputWarning) 

1354 self._var_classes.add(lb) 

1355 self.dep_params[lb] = 1 

1356 

1357 # Need to start indexing at 0 within each group. 

1358 # rx maps olds indices to new indices 

1359 rx = -1 * np.ones(len(self.model.endog), dtype=np.int32) 

1360 for g_ix, g_lb in enumerate(self.model.group_labels): 

1361 ii = self.model.group_indices[g_lb] 

1362 rx[ii] = np.arange(len(ii), dtype=np.int32) 

1363 

1364 # Reindex 

1365 for gp in self.model.group_labels: 

1366 for lb in self.pairs[gp].keys(): 

1367 a, b = self.pairs[gp][lb] 

1368 self.pairs[gp][lb] = (rx[a], rx[b]) 

1369 

1370 @Appender(CovStruct.update.__doc__) 

1371 def update(self, params): 

1372 

1373 endog = self.model.endog_li 

1374 varfunc = self.model.family.variance 

1375 cached_means = self.model.cached_means 

1376 dep_params = defaultdict(lambda: [0., 0., 0.]) 

1377 n_pairs = defaultdict(lambda: 0) 

1378 dim = len(params) 

1379 

1380 for k, gp in enumerate(self.model.group_labels): 

1381 expval, _ = cached_means[k] 

1382 stdev = np.sqrt(varfunc(expval)) 

1383 resid = (endog[k] - expval) / stdev 

1384 for lb in self.pairs[gp].keys(): 

1385 if (not self.return_cov) and lb in self._var_classes: 

1386 continue 

1387 jj = self.pairs[gp][lb] 

1388 dep_params[lb][0] += np.sum(resid[jj[0]] * resid[jj[1]]) 

1389 if not self.return_cov: 

1390 dep_params[lb][1] += np.sum(resid[jj[0]] ** 2) 

1391 dep_params[lb][2] += np.sum(resid[jj[1]] ** 2) 

1392 n_pairs[lb] += len(jj[0]) 

1393 

1394 if self.return_cov: 

1395 for lb in dep_params.keys(): 

1396 dep_params[lb] = dep_params[lb][0] / (n_pairs[lb] - dim) 

1397 else: 

1398 for lb in dep_params.keys(): 

1399 den = np.sqrt(dep_params[lb][1] * dep_params[lb][2]) 

1400 dep_params[lb] = dep_params[lb][0] / den 

1401 for lb in self._var_classes: 

1402 dep_params[lb] = 1. 

1403 

1404 self.dep_params = dep_params 

1405 self.n_pairs = n_pairs 

1406 

1407 @Appender(CovStruct.covariance_matrix.__doc__) 

1408 def covariance_matrix(self, expval, index): 

1409 dim = len(expval) 

1410 cmat = np.zeros((dim, dim)) 

1411 g_lb = self.model.group_labels[index] 

1412 

1413 for lb in self.pairs[g_lb].keys(): 

1414 j1, j2 = self.pairs[g_lb][lb] 

1415 cmat[j1, j2] = self.dep_params[lb] 

1416 

1417 cmat = cmat + cmat.T 

1418 np.fill_diagonal(cmat, cmat.diagonal() / 2) 

1419 

1420 return cmat, not self.return_cov